# **Notebook - Stable Diffusion Lora fine tuning on Vertex AI**

This notebook copies all codes and scripts here, just for a view. You can also directly use the code file.

## **Architecture:**
- **train.py** : model training file in Docker
- **Dockerfile**
- **cloud-build-config.yaml** : cloud config file used in CLI
- **vertex-ai-config.yaml** : : cloud config file used in CLI

In [None]:
%%writefile train.py
import subprocess
import os
import argparse
import re
import torch
from safetensors.torch import save_file

def bin_to_safetensors(output_path):
    newDict = dict();
    checkpoint = torch.load(output_path + '/pytorch_lora_weights.bin');
    for idx, key in enumerate(checkpoint):
      newKey = re.sub('\.processor\.', '_', key);
      newKey = re.sub('mid_block\.', 'mid_block_', newKey);
      newKey = re.sub('_lora.up.', '.lora_up.', newKey);
      newKey = re.sub('_lora.down.', '.lora_down.', newKey);
      newKey = re.sub('\.(\d+)\.', '_\\1_', newKey);
      newKey = re.sub('to_out', 'to_out_0', newKey);
      newKey = 'lora_unet_'+newKey;

      newDict[newKey] = checkpoint[key];

    newLoraName = 'pytorch_lora_weights.safetensors';
    print("Saving " + newLoraName);
    save_file(newDict, output_path + '/' + newLoraName);

def main(args):

    MODEL_NAME= args.model_name #"runwayml/stable-diffusion-v1-5"
    INSTANCE_DIR= args.input_storage
    OUTPUT_DIR= args.output_storage
    PROMPT = args.prompt

    os.chdir("/root/diffusers/examples/dreambooth")

    # for complex commands, with many args, use string + `shell=True`:
    cmd_str = (f'accelerate launch train_dreambooth_lora.py '
               f'--pretrained_model_name_or_path="{MODEL_NAME}" '
               f'--instance_data_dir="{INSTANCE_DIR}" '
               f'--output_dir="{OUTPUT_DIR}" '
               f'--instance_prompt="{PROMPT}" '
               f' --resolution=512 '
               f'--train_batch_size=1 '
               f'--use_8bit_adam '
               f'--mixed_precision="fp16" '
               f'--gradient_accumulation_steps=1 '
               f'--learning_rate=1e-4 '
               f'--lr_scheduler="constant" '
               f'--lr_warmup_steps=0 '
               f'--max_train_steps=400')

    subprocess.run(cmd_str, shell=True)
    # Convert .bin file to .safetensors, to be used in Automatic111 WebUI
    bin_to_safetensors(args.output_storage)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name", type=str, default="runwayml/stable-diffusion-v1-5", help="bucket_name/model_folder")
    parser.add_argument("--input_storage", type=str,default="abc", help="/gcs/bucket_name/input_image_folder")
    parser.add_argument("--output_storage", type=str, default="abc",help="/gcs/bucket_name/output_folder")
    parser.add_argument("--prompt", type=str, default="abc",help="a photo of XXX")
    
    args = parser.parse_args()
    print(args.model_name)
    print(args.input_storage)
    print(args.output_storage)
    print(args.prompt)
    main(args)

In [None]:
%%writefile Dockerfile
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04

RUN apt update
RUN apt install -y wget git python3 python3-venv python3-pip

RUN pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117

WORKDIR /root

RUN git clone -b v0.14.0 https://github.com/huggingface/diffusers.git \
  && pip install /root/diffusers \
  && pip install -U -r /root/diffusers/examples/dreambooth/requirements.txt \
  && pip install -U -r /root/diffusers/examples/text_to_image/requirements.txt \
  && pip install -U xformers \ 
  && pip install -U safetensors

# Solve Bitbytes and CUDA conflict issue
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/lib64
RUN ln -s /usr/local/cuda/lib64/libcudart.so.11.0 /usr/local/cuda/lib64/libcudart.so
RUN pip install -U bitsandbytes --prefer-binary

# Config accelerate
RUN accelerate config default --mixed_precision=fp16

# Installs additional packages as you need.
RUN pip install google-cloud-aiplatform
RUN pip install google-cloud-storage

# Copies the trainer code to the docker image.
COPY train.py /root/train.py

# Sets up the entry point to invoke the trainer.
ENTRYPOINT ["python3", "-m", "train"]

In [None]:
#cloud build config: modify docker image name and tag
%%writefile cloud-build-config.yaml
steps:
- name: 'gcr.io/cloud-builders/docker'
  args: [ 'build', '-t', 'us-central1-docker.pkg.dev/project_id/artifact_registry_name/sd-training:db-lora-v1', '.' ]
- name: 'gcr.io/cloud-builders/docker'
  args: ['push', 'us-central1-docker.pkg.dev/project_id/artifact_registry_name/sd-training:db-lora-v1']
options:
  machineType: 'N1_HIGHCPU_8'
  diskSizeGb: '200'

In [None]:
#vertex ai config: modify docker image name and tag
%%writefile vertex-ai-config.yaml
workerPoolSpecs:
  machineSpec:
    machineType: n1-standard-8
    acceleratorType: NVIDIA_TESLA_T4
    acceleratorCount: 1
  replicaCount: 1
  containerSpec:
    imageUri: us-central1-docker.pkg.dev/project_id/artifact_registry_id/sd-training:db-lora-v1

In [None]:
# cloud build image
! gcloud builds submit --config cloud-build-config.yaml .

# create vertex ai customer training job
# args format:
# --model_name: Huggingface repo id, or "/gcs/bucket_name/model_folder". I only test the models downloaded from HF, with standard diffusers format. Safetensors has not been test.
# --input_storage:/gcs/bucket_name/input_image_folder
# --output_storage: /gcs/bucket_name/output_folder
# --prompt: a photo of XXX
! gcloud ai custom-jobs create \
  --region=us-central1 \
  --display-name=sd-lora-training-args-0314-noyh \
  --config=vertex-ai-config.yaml \
  --args="--model_name=runwayml/stable-diffusion-v1-5,--input_storage=/gcs/sd_lsj/input_dog,--output_storage=/gcs/sd_lsj/dog_lora_output,--prompt=a photo of sks dog"


When training finished, you can load the base model and lora weights for inference.

In [None]:
# inference with fine-tuned lora model
from diffusers import StableDiffusionPipeline
import torch

model_path = "/somewhere/dog_lora_output"
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe.unet.load_attn_procs(model_path)
pipe.to("cuda")

prompt = "A sks dog in the desert."
image = pipe(prompt, num_inference_steps=30, guidance_scale=7.5).images[0]
image.save("dog_lora.png")

Convert .bin file to safetensors, to use in Automatic1111 WebUI

In [None]:
import os;
import re;
import torch;
from safetensors.torch import save_file;

newDict = dict();
checkpoint = torch.load('dog_lora_output/pytorch_lora_weights.bin');
for idx, key in enumerate(checkpoint):
    newKey = re.sub('\.processor\.', '_', key);
    newKey = re.sub('mid_block\.', 'mid_block_', newKey);
    newKey = re.sub('_lora.up.', '.lora_up.', newKey);
    newKey = re.sub('_lora.down.', '.lora_down.', newKey);
    newKey = re.sub('\.(\d+)\.', '_\\1_', newKey);
    newKey = re.sub('to_out', 'to_out_0', newKey);
    newKey = 'lora_unet_'+newKey;

    newDict[newKey] = checkpoint[key];

newLoraName = 'pytorch_lora_weights.safetensors';
print("Saving " + newLoraName);
save_file(newDict, newLoraName);

Alternatives: Dowload and save Stable Diffusion model from Huggingface to GCS

In [None]:
! pip install diffusers
! pip install transformers
! pip install accelerate

In [None]:
import torch
from diffusers import DiffusionPipeline

pipeline = DiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    revision="fp16",
    torch_dtype=torch.float16,
)
pipeline.save_pretrained("model_weights")

In [None]:
! gsutil cp -r model_weights gs://bucket_name/folder