# Training in Notebook

In [None]:
!pip install -U git+https://github.com/huggingface/diffusers@main

In [None]:
!wget https://raw.githubusercontent.com/huggingface/diffusers/main/examples/dreambooth/train_dreambooth_lora_sdxl.py

In [None]:
import diffusers
print(diffusers.__version__)

In [None]:
from huggingface_hub import snapshot_download

local_dir = "./dog"
snapshot_download(
    "diffusers/dog-example",
    local_dir=local_dir, repo_type="dataset",
    ignore_patterns=".gitattributes",
)

In [None]:
import glob
from PIL import Image
from diffusers.utils import make_image_grid 
import numpy as np

images = []
for f in glob.iglob("./dog/*"):
    images.append(Image.open(f).resize((256, 256)))

make_image_grid(images, 1, 5)

In [None]:
from accelerate.utils import write_basic_config
write_basic_config()

In [None]:
%%bash

export MODEL_NAME="stabilityai/stable-diffusion-xl-base-1.0"
export INSTANCE_DIR="dog"
export OUTPUT_DIR="lora-dog"
export VAE_PATH="madebyollin/sdxl-vae-fp16-fix"

accelerate launch train_dreambooth_lora_sdxl.py \
  --pretrained_model_name_or_path=$MODEL_NAME  \
  --pretrained_vae_model_name_or_path=$VAE_PATH \
  --instance_data_dir=$INSTANCE_DIR \
  --output_dir=$OUTPUT_DIR \
  --mixed_precision="fp16" \
  --instance_prompt="a photo of a sks dog bottle" \
  --resolution=1024 \
  --train_batch_size=2 \
  --gradient_accumulation_steps=2   \
  --gradient_checkpointing   \
  --learning_rate=1e-4 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=500 \
  --checkpointing_steps=717   \
  --seed="0"

In [None]:
from diffusers import StableDiffusionXLPipeline, AutoencoderKL, DiffusionPipeline
import torch

vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae, torch_dtype=torch.float16, variant="fp16",
    use_safetensors=True
)
# 
_ = pipe.to("cuda")

In [None]:
prompt = "An sks dog in a bucket"

image = pipe(
    prompt=prompt, 
    num_inference_steps=25).images[0]
image

In [None]:
pipe.load_lora_weights(".", weight_name="./lora-dog/pytorch_lora_weights.safetensors")

In [None]:
prompt = "An sks dog in a bucket"

image = pipe(
    prompt=prompt, 
    num_inference_steps=25).images[0]
image

# Build container

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

In [None]:
REGION = "us-central1"  # @param {type: "string"}

In [None]:
BUCKET=f"your-bucket-name-{PROJECT_ID}-unique"

In [None]:
TAG="sdxl-train:latest"

REPO="vertexai"

TRAIN_DOCKER_URI=f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPO}/{TAG}"

In [None]:
!gcloud artifacts repositories create $REPO --repository-format=docker --location=europe-west4 --description="Docker repository"

In [None]:
# make sure you executed this in a terminal
# !gcloud auth configure-docker $REGION-docker.pkg.dev

In [None]:
!docker build -f Dockerfile . -t $TAG

In [None]:
!docker tag $TAG $TRAIN_DOCKER_URI

In [None]:
!docker push $TRAIN_DOCKER_URI

# Training Vertex

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=f"gs://{BUCKET}")

In [None]:
import base64
import glob
import os
from datetime import datetime
from io import BytesIO

import requests
from google.cloud import aiplatform, storage
from PIL import Image

def create_job_name(prefix):
    user = os.environ.get("USER")
    now = datetime.now().strftime("%Y%m%d_%H%M%S")
    job_name = f"{prefix}-{user}-{now}"
    return job_name


def get_bucket_and_blob_name(filepath):
    # The gcs path is of the form gs://<bucket-name>/<blob-name>
    gs_suffix = filepath.split("gs://", 1)[1]
    return tuple(gs_suffix.split("/", 1))


def upload_local_dir_to_gcs(local_dir_path, gcs_dir_path):
    """Uploads files in a local directory to a GCS directory."""
    client = storage.Client()
    bucket_name = gcs_dir_path.split("/")[2]
    bucket = client.get_bucket(bucket_name)
    for local_file in glob.glob(local_dir_path + "/**"):
        if not os.path.isfile(local_file):
            continue
        filename = local_file[1 + len(local_dir_path) :]
        gcs_file_path = os.path.join(gcs_dir_path, filename)
        _, blob_name = get_bucket_and_blob_name(gcs_file_path)
        blob = bucket.blob(blob_name)
        blob.upload_from_filename(local_file)
        print("Copied {} to {}.".format(local_file, gcs_file_path))

In [None]:
upload_local_dir_to_gcs("datasets/dog", f"gs://{BUCKET}/dreambooth/dog")

In [None]:
# The pre-trained model to be loaded.
model_id = "stabilityai/stable-diffusion-xl-base-1.0"

# Input and output path.
instance_dir = f"/gcs/{BUCKET}/dreambooth/dof"
output_dir = f"/gcs/{BUCKET}/dreambooth/output/dog-lora"

# Worker pool spec.
machine_type = "n1-standard-4" # "a2-highgpu-1g" # "g2-standard-4" # "a2-ultragpu-1g"  
num_nodes = 1
gpu_type =  "NVIDIA_TESLA_V100" # "NVIDIA_TESLA_A100" # "NVIDIA_L4" # "NVIDIA_A100_80GB" 
num_gpus = 1

# Setup training job.
job_name = create_job_name("dreambooth-stable-diffusion")
job = aiplatform.CustomContainerTrainingJob(
    display_name=job_name,
    container_uri=TRAIN_DOCKER_URI,
)

# Pass training arguments and launch job.
# See https://github.com/huggingface/diffusers/blob/v0.14.0/examples/dreambooth/train_dreambooth.py#L75
# for a full list of training arguments.
model = job.run(
    args=[
        "dreambooth/train_dreambooth_lora_sdxl.py",
        f"--pretrained_model_name_or_path={model_id}",
        # "--train_text_encoder",
        f"--instance_data_dir={instance_dir}",
        f"--output_dir={output_dir}",
        "--mixed_precision='fp16'",
        "--instance_prompt='a person wearing a googlecloud42 t-shirt'",
        "--resolution=1024",
        "--train_batch_size=2",
        "--gradient_accumulation_steps=2",
        "--gradient_checkpointing",
        "--learning_rate=1e-4",
        "--lr_scheduler=constant",
        "--lr_warmup_steps=0",
        "--max_train_steps=500",
        "--checkpointing_steps=717",
        "--enable_xformers_memory_efficient_attention",
        # "--use_8bit_adam",
        "--seed='0'",
    ],
    replica_count=num_nodes,
    machine_type=machine_type,
    accelerator_type=gpu_type,
    accelerator_count=num_gpus,
    sync=False
)