# Training of Stable Diffusion HF models using DreemBooth with Low-Rank Adaptation of Large Language models (LoRA)

## Setup

In [1]:
### Choose run on Colab or locally

# run_from = "locally"
run_from = "colab"

In [2]:
if run_from == "colab":
    from google.colab import drive
    drive.mount('/content/drive')
    START_DIR = '/content/drive/MyDrive/Colab_Notebooks/Fine_tuning_HF_SD_model_using_DreamBooth_LoRA'
elif run_from == "locally":
    START_DIR = "."

import os
os.chdir(START_DIR)

Mounted at /content/drive


In [3]:
!ls

custom_models
default_config.yaml
download_sdiff_model.py
Fine_tuning_HF_SD_model_using_DreamBooth_LoRA.ipynb
HF_models
initial_setup.sh
prompt_generation_testing.tsv
requirements.txt
Screencast_Customized_image_generation.mp4
sdiff_finetuning.html
sdiff_finetuning.py
sdiff_finetuning_template.py
SDiff_finetuning_testing.ipynb
sdiff_inference_basic.py
sdiff_inference_demo.py
sdiff_inference.html
sdiff_inference_pro.py
sdiff_inference.py
sdiff_inference_template.py
SDiff_inference_testing.ipynb
train_dreambooth_lora.py
Users


In [4]:
%pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch==1.13.1
  Downloading torch-1.13.1-cp39-cp39-manylinux1_x86_64.whl (887.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.4/887.4 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate==0.17.1
  Downloading accelerate-0.17.1-py3-none-any.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.8/212.8 KB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.14.1
  Downloading torchvision-0.14.1-cp39-cp39-manylinux1_x86_64.whl (24.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.2/24.2 MB[0m [31m68.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting diffusers==0.14.0
  Downloading diffusers-0.14.0-py3-none-any.whl (737 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m737.4/737.4 KB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transfo

In [None]:
from accelerate.utils import write_basic_config
write_basic_config()

  from .autonotebook import tqdm as notebook_tqdm


PosixPath('/home/drphyl/.cache/huggingface/accelerate/default_config.yaml')

In [None]:
### If desired, activate Hugging Face API to upload fine-tuned model ###
# for training run "!accelerate launch train_dreambooth_lora.py ..." with attribute [--push_to_hub]

!huggingface-cli login




In [5]:
!nvidia-smi

Thu Apr  6 14:10:21 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
def slugify(value, allow_unicode=False):
    import unicodedata, re
    '''
    Taken from https://github.com/django/django/blob/master/django/utils/text.py
    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
    dashes to single dashes. Remove characters that aren't alphanumerics,
    underscores, or hyphens. Convert to lowercase. Also strip leading and
    trailing whitespace, dashes, and underscores.
    '''
    if allow_unicode:
        value = unicodedata.normalize('NFKC', value)
    else:
        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value.lower())
    return re.sub(r'[-\s]+', '-', value).strip('-_')

### Choose the models

In [7]:
### models with Resolution 512x512 ###
hf_model="stabilityai/stable-diffusion-2-1-base"
# hf_model="CompVis/stable-diffusion-v1-4"
# hf_model="runwayml/stable-diffusion-v1-5"
# hf_model="dreamlike-art/dreamlike-photoreal-2.0"
          

### models with Resolution 768x768 ###
# hf_model="stabilityai/stable-diffusion-2-1"

## Model customization: Finetuning and Training

In [None]:
# retrieve user_id, training_prompt and resolution from the form data
user_id = ""
training_prompt = "jelly in a glass"
resolution = "512"

# set sdiff_model based on the resolution
if resolution == '768':
    sdiff_model = "stable-diffusion-2-1"
elif resolution == '512':
    sdiff_model = "stable-diffusion-2-1-base"
else:
    print('Invalid resolution')


# convert training prompt to a valid directory name
prompt_as_path = slugify(training_prompt)

# construct file paths
MODEL_PATH = os.path.join(START_DIR, "HF_models", sdiff_model)

# USER_DIR = os.path.join(START_DIR, "Users", user_id)
USER_DIR = START_DIR

LORA_PATH = os.path.join(USER_DIR, 'custom_models')
TRAINING_DATA_PATH = os.path.join(USER_DIR, "training_data", prompt_as_path)
LR_SCHEDULER = "constant"

# print some debug info
print("*** FINETUNING ***")
print("User: ", user_id)
print("Training Prompt: ", training_prompt)
print("Stable Diffusion Model: ", sdiff_model)
print("Loading training images from: ", TRAINING_DATA_PATH)

*** FINETUNING ***
User:  
Training Prompt:  jelly in a glass
Stable Diffusion Model:  stable-diffusion-2-1-base
Loading training images from:  ./training_data/jelly-in-a-glass


In [None]:
!ls

accelerate_CLI.txt
default_config.yaml
download_sdiff_model.py
Fine_tuning_HF_SD_model_using_DreamBooth_LoRA.ipynb
HF_models
initial_setup.sh
output
python3.9.16
requirements.txt
sdiff_finetuning.py
sdiff_finetuning_template.py
SDiff_finetuning_testing.ipynb
sdiff_inference_basic.py
sdiff_inference_demo.py
sdiff_inference_pro.py
sdiff_inference_template.py
SDiff_inference_testing.ipynb
Stable-Diffusion-sandbox-backup
static
templates
train_dreambooth_lora.py
Users


In [None]:
# Launch train_dreambooth_lora.py with command line parameters
    subprocess.run(['accelerate', 'launch', 'train_dreambooth_lora.py',
        f'--pretrained_model_name_or_path={MODEL_PATH}',
        f'--instance_data_dir={TRAINING_DATA_PATH}',
        f'--output_dir={LORA_PATH}',
        f'--instance_prompt={training_prompt}',
        f'--resolution={resolution}',
        f'--train_batch_size=1',
        f'--gradient_accumulation_steps=1',
        f'--checkpointing_steps=250',
        f'--learning_rate=1e-4',
        f'--lr_scheduler={LR_SCHEDULER}',
        f'--lr_warmup_steps=0',
        f'--max_train_steps=500',
        f'--validation_prompt={training_prompt}',
        f'--validation_epochs=50',
        f'--seed=0',
        f'--gradient_checkpointing',
        f'--use_8bit_adam',
        f'--enable_xformers_memory_efficient_attention'
    ], capture_output=True, text=True)

In [None]:
# Rename LoRA model
source_path = os.path.join(LORA_PATH, 'pytorch_lora_weights.bin')
dest_path = os.path.join(LORA_PATH, f'{prompt_as_path}__{sdiff_model}.bin')
os.rename(source_path, dest_path)
print('Model saved as ', dest_path)

Model saved as  ./custom_models/jelly-in-a-glass__stable-diffusion-2-1-base.bin


In [None]:
### Running via CLI ###

# if necessary, change RESOLUTION and PUSH_TO_HUB 

!accelerate launch train_dreambooth_lora.py \
  --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5"  \
  --instance_data_dir="training_data/cat_in_style_of_Vasya_Lozhkin" \
  --output_dir="custom_models" \
  --instance_prompt="cat in style of Vasya Lozhkin" \
  --resolution=512 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=1 \
  --checkpointing_steps=250 \
  --learning_rate=1e-4 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=500 \
  --validation_prompt="Vasya Lozhkin" \
  --validation_epochs=50 \
  --seed="0" \
# --push_to_hub \
  --gradient_checkpointing \
  --use_8bit_adam \
  --enable_xformers_memory_efficient_attention

2023-03-07 11:39:11.333615: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-03-07 11:39:11.335765: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-03-07 11:39:17.141568: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-03-07 11:39:17.141674: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7';

## Inference

In [21]:
# Define local path where Diffusers model will be stored
import os
sdiff_model = hf_model.split('/')[-1]
MODEL_PATH = os.path.join(START_DIR, "HF_models", sdiff_model)
model_name = sdiff_model

In [9]:
# USER_DIR = os.path.join(START_DIR, "Users", user_id)
USER_DIR = START_DIR

In [10]:
### To run on GPU
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
import torch

print("MODEL: ", sdiff_model)
pipe = DiffusionPipeline.from_pretrained(MODEL_PATH, torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.to("cuda")
pipe.enable_xformers_memory_efficient_attention() 


MODEL:  stable-diffusion-2-1-base




In [None]:
### To run on CPU only (no GPU available)
from diffusers import DiffusionPipeline
# from diffusers import DPMSolverMultistepScheduler
import torch

print("MODEL: ", sdiff_model)
pipe = DiffusionPipeline.from_pretrained(MODEL_PATH)
# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

The cache for model files in Diffusers v0.14.0 has moved to a new location. Moving your existing cached models. This is a one-time operation, you can interrupt it or run it later by calling `diffusers.utils.hub_utils.move_cache()`.


MODEL:  stabilityai/stable-diffusion-2-1-base




In [32]:
### Path to custom model ###

# lora_model = "jelly_lora_weights_stable-diffusion-2-1-base.bin"
# lora_model = "jelly_lora_weights_stable-diffusion-v1-4.bin"

# lora_model = "cat_lozhkin_lora_weights_stable-diffusion-v1-4.bin"
# lora_model = "cat_lozhkin_lora_weights_stable-diffusion-v1-5.bin"

lora_model = "jelly-in-a-glass__stable-diffusion-2-1-base.bin"

LORA_PATH = os.path.join(USER_DIR, "custom_models", lora_model)

# If the LoRA finetuned model is specified and exists, load the attention processes from it
if lora_model is not None and os.path.exists(LORA_PATH):
    model_name = lora_model
    pipe.unet.load_attn_procs(LORA_PATH)

In [33]:
model_name

'jelly-in-a-glass__stable-diffusion-2-1-base.bin'

In [11]:
pipe

StableDiffusionPipeline {
  "_class_name": "StableDiffusionPipeline",
  "_diffusers_version": "0.14.0",
  "feature_extractor": [
    "transformers",
    "CLIPFeatureExtractor"
  ],
  "requires_safety_checker": false,
  "safety_checker": [
    null,
    null
  ],
  "scheduler": [
    "diffusers",
    "PNDMScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}

### For single set of parameters



In [None]:
# create a generator with seed 42 for reproducibility

generator = torch.Generator(device="cuda").manual_seed(42)

# generator = torch.Generator(device="cpu").manual_seed(42)


In [None]:

### Generate image ###

# prompt = input("Prompt: ")
# negative_prompt = input("Negative prompt: ")
# num_images = int(input("How many images would you like to generate? "))


# prompt = "jelly in a glass" 
prompt = "cat in style of Vasya Lozhkin #cpu" 
negative_prompt = "low poly, low-poly, 3d, disfigured, kitsch, oversaturated, grain, low-res, Deformed, blurry, poorly drawn face, mutation, mutated, extra limb, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus,  poorly drawn, pixel-art, pixelated"
num_images = 1

In [None]:
# cross_attention_kwargs={"scale": 0.0} means LoRA weights will not be used;
# cross_attention_kwargs={"scale": 1.0} means that only the LoRA fine-tuned weights will be used. 
# Values between 0 and 1 will interpolate between the two weights.

cross_attention = 1.0
finetuning_tag = "basic_model" if cross_attention == 0 else  "lora_model"

In [None]:
Images = pipe(
    prompt,
    ### For custom LoRA model
# num_inference_steps= int(input("Input number of diffusion steps: "))


for prompt in prompts:
    for num_inference_steps in [50, 75]:
      prompt_as_path = slugify(prompt)
      generator = torch.Generator(device="cuda").manual_seed(42)
      for cross_attention in [0.0, 1.0]:
        Images = pipe(prompt,
                        cross_attention_kwargs={"scale": cross_attention},
                        num_inference_steps=num_inference_steps, generator=generator,
                        num_images_per_prompt = num_images,
                        negative_prompt = negative_prompt
                        ).images
        finetuning_tag = "basic_model" if cross_attention == 0 else  "finetuned_model"
        output_path = os.path.join(USER_DIR, "output", "quality_diffusion_steps", model_name, prompt_as_path, finetuning_tag)
        try:
            os.makedirs(output_path, exist_ok = True)
        except OSError as error:
            print("Directory '%s' can not be created" % path)
        for idx, im in enumerate(Images):
            im.save(f"{output_path}/{idx}_{num_inference_steps}-steps.png")cross_attention_kwargs={"scale": cross_attention},
    num_inference_steps=25,
    generator=generator,
    num_images_per_prompt = num_images,
    negative_prompt = negative_prompt,
    ).images

100%|██████████| 25/25 [23:51<00:00, 57.25s/it]


In [None]:
### Save all generated images ###

prompt_as_path = slugify(prompt)
output_path = os.path.join("output", model_name, prompt_as_path, finetuning_tag)
try:
    os.makedirs(output_path, exist_ok = True)
except OSError as error:
    print("Directory '%s' can not be created" % path)
for idx, im in enumerate(Images):
    im.save(f"{output_path}/{idx}.png")

In [None]:
# Manually displays all images (on Colab)
from IPython.display import display

for idx, im in enumerate(Images):
    display(im)

NameError: name 'Image' is not defined

###  Batch images generation for parameters testing

In [34]:
# prompts= ["Futuristic underwater building shaped like a rounded pyramid"]
# prompts = ["jelly in shape of Taj Mahal", "jelly in a glass in shape of Taj Mahal"]
prompts = ["pirate on a grey horse in a starry sky", "two girls in white dresses riding bycicles"]


# prompts = ["pikachu in form of cat in style of Vasya Lozhkin", "cat in style of Vasya Lozhkin #24", "Michael Jackson in form of cat in style of Vasya Lozhkin"]
# prompts = ["cat", "cat in style of Vasya Lozhkin", "Michael Jackson in style of Vasya Lozhkin"]


num_images = 4
negative_prompt = "low poly, low-poly, 3d, disfigured, kitsch, oversaturated, grain, low-res, Deformed, blurry, poorly drawn face, mutation, mutated, extra limb, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus,  poorly drawn, pixel-art, pixelated"



In [35]:
prompts

['jelly in a glass', 'Taj Mahal in form of jelly in a glass']

In [37]:
generator = torch.Generator(device="cuda").manual_seed(24)
# num_inference_steps= int(input("Input number of diffusion steps: "))


for prompt in prompts:
    for num_inference_steps in [50, 75]
        prompt_as_path = slugify(prompt)
        for cross_attention in [0.0, 1.0]:
            Images = pipe(prompt,
                        cross_attention_kwargs={"scale": cross_attention},
                        num_inference_steps=num_inference_steps, generator=generator,
                        num_images_per_prompt = num_images,
                        negative_prompt = negative_prompt
                        ).images
            finetuning_tag = "basic_model" if cross_attention == 0 else  "finetuned_model"
            output_path = os.path.join(USER_DIR, "output", "quality_diffusion_steps", model_name, prompt_as_path, finetuning_tag)
            try:
                os.makedirs(output_path, exist_ok = True)
            except OSError as error:
                print("Directory '%s' can not be created" % path)
            for idx, im in enumerate(Images):
                im.save(f"{output_path}/{idx}_{num_inference_steps}-steps.png")

Input number of diffusion steps: 75


  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

In [38]:
import pandas as pd
prompts_df = pd.read_csv("prompt_generation_testing.tsv", encoding='UTF-8', sep='\t')
prompts_df

Unnamed: 0,User_input_1,User_input_2,Preprocessing,Response_1,Prompt_for_image_generation
0,"costume, sign, rumor",,,,"A spooky witch costume with a Beware sign, fue..."
1,"memory, people, society",,,,A group of friends reminiscing about old times...
2,"memory, people, society",jelly in a glass,EXTRACT(User_input_2),Jelly served in a glass.,"Jelly served in a glass, people reminisce memo..."
3,"memory, people, society",life is not a bed of roses,EXTRACT(User_input_2),Life is challenging.,A group of individuals struggling to remember ...
4,"birthday, inspection, virus, wife, diamond, ca...",,UNITE(User_input_1),The common idea behind all these things is tha...,"A digital platform for attention, planning, an..."
5,,The taste of wine is an elusive essence. Is it...,EXTRACT(User_input_2),The complexity of wine taste.,"A rich and intricate wine flavor profile, with..."
6,,"Before you spin up your first instance, you'll...",EXTRACT(User_input_2),Setting up local key before instance creation.,A Local Key being Configured for Instance Crea...
7,,Cyberpunk game where the player has to shoot d...,EXTRACT(User_input_2),Dragon bear fights Agent Smith's vehicles unde...,Dragon bear battles Agent Smith's vehicles in ...
8,"brain, computer, interface, mobility, perspect...",New way to read human mind using fMRI scan data,"UNITE(User_input_1), EXTRACT(User_input_2),",All of these things are related to enhancing a...,IMAGE of TECH-ENHANCED HUMAN CAPABILITIES: Bra...


In [39]:
prompts_df = prompts_df.fillna("")

In [44]:
i = 1
prompt_as_path = (f'{slugify(prompts_df.iloc[i, 0])}__{slugify(prompts_df.iloc[i, 1])}')[:255]

In [45]:
prompt_as_path

'memory-people-society__'

In [15]:
for i, prompt in enumerate(prompts_df.Prompt_for_image_generation):
    print(i, prompt)

0 A spooky witch costume with a Beware sign, fueling rumors.
1 A group of friends reminiscing about old times in a crowded cafe, evoking memories of their shared experiences and the impact they have had on society.
2 Jelly served in a glass, people reminisce memories, society gathers around.
3 A group of individuals struggling to remember in a challenging society, depicting the complexity of life.
4 A digital platform for attention, planning, and protection of valuable items, events, and education.
5 A rich and intricate wine flavor profile, with layers of complexity and depth, highlighted by subtle notes and nuances.
6 A Local Key being Configured for Instance Creation, with Technical Tools and Cables.
7 Dragon bear battles Agent Smith's vehicles in underwater combat, amidst raging currents and murky depths.
8 IMAGE of TECH-ENHANCED HUMAN CAPABILITIES: Brain-Computer Interface with Mobility & Perspective Expansion, Mind-Reading via fMRI.


In [None]:
### Apply style if necesssary
styles = ['painting', 'sketch', 'abstract', 'advert', 'poster', 'anime cartoon']
prompts = [f'{prompt}, in style of {style}' for prompt in prompts]

In [1]:
generator = torch.Generator(device="cuda").manual_seed(24)
num_inference_steps= int(input("Input number of diffusion steps: "))


for style in ['painting', 'sketch', 'aquarelle']:
    for i, prompt in enumerate(prompts_df.Prompt_for_image_generation):
        prompt_as_path = (f'{slugify(prompts_df.iloc[i, 0])}__{slugify(prompts_df.iloc[i, 1])}')[:255]    
        prompt = f'{prompt}, {style} image style'
        Images = pipe(prompt,
                    num_inference_steps=num_inference_steps, 
                    generator=generator,
                    num_images_per_prompt = num_images,
                    negative_prompt = negative_prompt
                        ).images
        output_path = os.path.join(USER_DIR, "output", "prompt_generation", model_name, slugify(style), prompt_as_path)
        try:
            os.makedirs(output_path, exist_ok = True)
        except OSError as error:
            print("Directory '%s' can not be created" % path)
        for idx, im in enumerate(Images):
            im.save(f"{output_path}/{idx}_{num_inference_steps}-steps.png")

NameError: name 'torch' is not defined

In [None]:
### Download all generated images as ZIP erchive ###
from google.colab import files as FILE
import shutil
shutil.make_archive("output_samples", 'zip', os.path.join(USER_DIR, "output"))
FILE.download(os.path.join(USER_DIR, 'output_samples.zip'))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>