<a href="https://colab.research.google.com/github/plundh/pl-dreambooth/blob/main/pl_ShivamDreamBooth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is based off [Shivam Shriraro's repo](https://github.com/ShivamShrirao/diffusers/tree/main/examples/dreambooth), but with a modified notebook.


*On your Google Drive:*
1. Put your training images in ***dreambooth/training_images/[training_folder_name]***
2. Put your class images in ***dreambooth/class_images/[class_folder_name]***


In [None]:
#@title Check Runtime GPU
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

In [None]:
#@title #1. Install Requirements

import os
from google.colab import drive

# Mount Google Drive if not already mounted
gdrive_path = "/content/google_drive"
if os.path.isdir(gdrive_path):
  print(f"Google Drive  already mounted at '{gdrive_path}'" )
else:
  drive.mount(gdrive_path)

!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/examples/dreambooth/train_dreambooth.py
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/scripts/convert_diffusers_to_original_stable_diffusion.py
%pip install -qq git+https://github.com/ShivamShrirao/diffusers
%pip install -q -U --pre triton
%pip install -q accelerate==0.12.0 transformers ftfy bitsandbytes gradio natsort
%pip install -q https://github.com/metrolobo/xformers_wheels/releases/download/1d31a3ac_various_6/xformers-0.0.14.dev0-cp37-cp37m-linux_x86_64.whl
# These were compiled on Tesla T4, should also work on P100, thanks to https://github.com/metrolobo

# If precompiled wheels don't work, install it with the following command. It will take around 40 minutes to compile.
# %pip install git+https://github.com/facebookresearch/xformers@1d31a3a#egg=xformers

#@markdown You have to: \\
#@markdown * Be a registered user in Hugging Face Hub and intput your [access token](https://huggingface.co/settings/tokens) here.
#@markdown * Accept the model license before downloading or using the Stable Diffusion weights. Visit the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox to agree.  

!mkdir -p ~/.huggingface
huggingface_token = "" #@param {type:"string"}
!echo -n "{huggingface_token}" > ~/.huggingface/token

print("Done!")

In [None]:
#@title #2. Training Data and Settings

import math
import fnmatch
import json
from collections import Counter
from prettytable import PrettyTable
from prettytable import SINGLE_BORDER
from IPython.display import Markdown as md

#@markdown Suggested settings:
#@markdown * **10-30** training images
#@markdown * **80-120** steps per training image

#@markdown \

#@markdown **Training images -**
#@markdown _/content/google_drive/MyDrive/dreambooth/training_images/[training_folder_name]_ \\
training_folder_name = "azorn" #@param {type:"string"}
concept_name = "AndersZorn" #@param {type:"string"}

#@markdown If you wish to train multiple concepts, add additional dictionaries to **concepts_input** in the code. \\


#@markdown \

#@markdown **Class images -**
#@markdown _/content/google_drive/MyDrive/dreambooth/class_images/[class_folder_name]_
enable_prior_preservation = False #@param {type:"boolean"}
class_folder_name = "style_ddimm" #@param {type:"string"}

if enable_prior_preservation:
  class_name = "artstyle" #@param {type:"string"}
else:
  class_name = ""

#@markdown If no folder is provided, class images will be generated prior to training.

training_images_root = gdrive_path + "/MyDrive/dreambooth/training_images/"
class_images_root = gdrive_path + "/MyDrive/dreambooth/class_images/"

concepts_list = []

concepts_input = [
    {
        "concept_name":                   concept_name,
        "class_name":                     class_name,
        "training_folder_name":           training_folder_name,
        "class_folder_name":              class_folder_name
#    },
#    {
#        "concept_name":                  "JohnSingerSargent",
#        "class_name":                    "artstyle",
#        "training_folder_name":          "jsargent",
#        "class_folder_name":             "style_ddim"
    }
]

for concept in concepts_input:
  instance_values = {
        "instance_prompt":      concept["concept_name"],
        "instance_data_dir":    f"{training_images_root}{concept['training_folder_name']}/",
        "inst_file_count":      len(os.listdir(f"{training_images_root}{concept['training_folder_name']}")),
     }
  
  if enable_prior_preservation:
    class_values ={
      "class_prompt":         concept["class_name"],
      "class_data_dir":       f"{class_images_root}{concept['class_folder_name']}/",
      "class_file_count":     len(os.listdir(f"{class_images_root}{concept['class_folder_name']}"))
    }
    combined_values = {**instance_values, **class_values}
    concepts_list.append(combined_values)
  else:
    concepts_list.append(instance_values)

if not enable_prior_preservation: print("☑️ Skipping Prior Preservation")

#print(json.dumps(concepts_list, sort_keys=True, indent=2)) # For debugging
#print("\n")
if len(concepts_list) == 1:
    print("Training 1 concept")
else:
    print(f"Training {len(concepts_list)} concepts")

for concept in concepts_list:
  print("\n")
  class_name = f"_{concept['class_prompt']}" if enable_prior_preservation else ''
  combined_token = ",".join([concept['instance_prompt']for concept in concepts_list])
  combined_token_class_folder = ",".join(["(" + str(concept['instance_prompt']) + class_name + "@" + str(os.path.basename(os.path.normpath(concept['instance_data_dir']))) + "_" + str(concept['inst_file_count']) + "i" + ")" for concept in concepts_list])
  if concept['inst_file_count'] == 0:
    print(f"❌ No training images found in '{concept['instance_data_dir']}'")
  else:
    print(f"✅ {concept['inst_file_count']} training images found in '{concept['instance_data_dir']}'")

  if enable_prior_preservation:
    total_class_images = sum([concept['inst_file_count'] for concept in concepts_list])
    if concept['class_file_count'] == 0:
          print(f"❌ Noclass images found in '{concept['class_data_dir']}'")
    else:
      print(f"✅ {concept['class_file_count']} class images found in '{concept['class_data_dir']}'")

  concept_table = PrettyTable(header=False)
  concept_table.align = "l"
  concept_table.set_style(SINGLE_BORDER)

  for key, value in concept.items():
    concept_table.add_row([key, value])
  print(concept_table)

with open("concepts_list.json", "w") as f:
    json.dump(concepts_list, f, indent=4)

#if enable_prior_preservation:
#  for concept in concepts_list:
#    os.makedirs("concept['instance_prompt']", exist_ok=True)

#@markdown \

#@markdown **Training Settings**
training_steps = 5000 #@param {type:"integer"}
save_interval = 1000 #@param {type:"integer"}
pretrained_model_name = "runwayml/stable-diffusion-v1-5" #@param ["runwayml/stable-diffusion-v1-5", "compvis/stable-diffusion-v1-4"]

## Output paths
date_string = !date +"%Y-%m-%d_%H-%M"

def output_file_path(training_steps):
  return f"{gdrive_path}/MyDrive/dreambooth/models/{date_string[-1]}_{combined_token_class_folder}_{training_steps}s_pl-shivam.ckpt"
temp_folder = f"{date_string[-1]}_{combined_token_class_folder}_{training_steps}s"
temp_folder = temp_folder.replace(" ", "-")
temp_folder_root = f"stable_diffusion_weights/{temp_folder}"

!mkdir -p temp_folder
!mkdir -p "{gdrive_path}/MyDrive/dreambooth/models"

total_training_images = sum([concept["inst_file_count"] for concept in concepts_list])

checkpoint_steps = training_steps
checkpoints_table = PrettyTable(['Step', 'Steps per image'])
checkpoints_table.set_style(SINGLE_BORDER)
checkpoints_table.sortby = "Step"

while checkpoint_steps > 0:
  steps_per_img = int(checkpoint_steps / total_training_images)
  checkpoints_table.add_row([checkpoint_steps, steps_per_img])
  checkpoint_steps -= save_interval

print("\n")
print(f"Saving checkpoints every {save_interval} steps:")
print(checkpoints_table)
print("\n")
print("Ready for training ...")

In [None]:
#@title #3. Training

if enable_prior_preservation:
  !accelerate launch train_dreambooth.py \
    --pretrained_model_name_or_path="{pretrained_model_name}" \
    --pretrained_vae_name_or_path="stabilityai/sd-vae-ft-mse" \
    --output_dir="{temp_folder_root}" \
    --with_prior_preservation \
    --prior_loss_weight=1.0 \
    --num_class_images="{total_class_images}"  \
    --seed=1337 \
    --resolution=512 \
    --train_batch_size=1 \
    --train_text_encoder \
    --mixed_precision="fp16" \
    --use_8bit_adam \
    --gradient_accumulation_steps=1 \
    --learning_rate=1e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps="{training_steps}" \
    --save_interval="{save_interval}" \
    --concepts_list="concepts_list.json"

else:
  !accelerate launch train_dreambooth.py \
    --pretrained_model_name_or_path="{pretrained_model_name}" \
    --pretrained_vae_name_or_path="stabilityai/sd-vae-ft-mse" \
    --output_dir="{temp_folder_root}" \
    --seed=1337 \
    --resolution=512 \
    --train_batch_size=1 \
    --train_text_encoder \
    --mixed_precision="fp16" \
    --use_8bit_adam \
    --gradient_accumulation_steps=1 \
    --learning_rate=1e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps="{training_steps}" \
    --save_interval="{save_interval}" \
    --concepts_list="concepts_list.json"

# Reduce the `--save_interval` to lower than `--max_train_steps` to save weights from intermediate steps.
# `--save_sample_prompt` can be same as `--instance_prompt` to generate intermediate samples (saved along with weights in samples directory).

print("Done!")

In [None]:
#@title #4. Export Checkpoint files to Google Drive
from google.colab import runtime
from natsort import natsorted
from glob import glob
import os

#@markdown  File name legend: **[date]** \_( **[token]** @ **[training_image_folder]** \_ **[image_count]** i)_ **[steps]** s_pl-shivam.ckpt \\
export_only_last_checkpoint = False #@param {type:"boolean"}
disconnect_on_completion = False #@param {type:"boolean"}
half_arg = ""
fp16 = True
if fp16:
    half_arg = "--half"

print(output_file_path(training_steps))

def convert_diffusers(checkpoint_folder_name, steps_count):
  try:
    !python convert_diffusers_to_original_stable_diffusion.py --model_path "{checkpoint_folder_name}" --checkpoint_path "{output_file_path(steps_count)}" --half
    print(f"✅ Saved model '{output_file_path(steps_count)}'")
  except:
    print(f"❌ Could not save model '{output_file_path(training_steps)}'")

if not export_only_last_checkpoint:
  for folder in natsorted(glob(f"{temp_folder_root}/*/", recursive = True)):
    folder_checkpoint = int((os.path.basename(os.path.normpath(folder))))
    if folder_checkpoint not in [0, training_steps]:
      convert_diffusers(folder, folder_checkpoint)

print(training_steps)
print(output_file_path(training_steps))

convert_diffusers(natsorted(glob(f"{temp_folder_root}/*/", recursive = True))[-1], training_steps)


if enable_prior_preservation:
  print(f"To invoke your trained subject, use '{combined_token_class_folder}' in the prompt.")
else:
  print(f"To invoke your trained subject, use '{combined_token}' in the prompt.")

if disconnect_on_completion:
  runtime.unassign()