# Dreambooth Stable Diffusion - ☃️🎄XMas 2022 Edition🎄☃️
This Colab is based on Shivam Shrirao's repository and has been modified to use revision hash 47f456ea3dd3c6ba3f5cc1bcc0f69e79c787208b of that repository from 2022-12-25.

https://github.com/yushan777/dbsd-xmas-edition

https://github.com/ShivamShrirao/diffusers/tree/main/examples/dreambooth

https://arxiv.org/pdf/2208.12242.pdf

In [None]:
#@title 1. Check type of GPU and VRAM available.
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

Tesla T4, 15360 MiB, 15101 MiB


In [None]:
# uninstall pytorch to clean things up a bit
%pip uninstall torch torchtext torchaudio torchvision --y

In [None]:
#@title 2. Install Requirements

# install diffusers from the ShivamShrirao's repo, revision : 47f456ea3dd3c6ba3f5cc1bcc0f69e79c787208b (25th Dec 2022)
%pip install git+https://github.com/ShivamShrirao/diffusers.git@47f456ea3dd3c6ba3f5cc1bcc0f69e79c787208b

# get train_dreambooth.py from revision 47f456ea3dd3c6ba3f5cc1bcc0f69e79c787208b
!wget https://github.com/ShivamShrirao/diffusers/raw/47f456ea3dd3c6ba3f5cc1bcc0f69e79c787208b/examples/dreambooth/train_dreambooth.py

# for scripts we want the latest ones so that safetensors are supported
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/scripts/convert_diffusers_to_original_stable_diffusion.py
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/scripts/convert_original_stable_diffusion_to_diffusers.py

# install requisite packages
%pip install torch==1.13.0+cu116 torchvision==0.14.0+cu116 torchaudio==0.13.0 --extra-index-url https://download.pytorch.org/whl/cu116
%pip install -q -U --pre triton==2.0.0.dev20221030
%pip install -q accelerate==0.12.0 transformers==4.23.1 ftfy bitsandbytes==0.35.0 gradio natsort safetensors
#%pip install xformers==0.0.13
%pip install -q https://github.com/yushan777/xformers-wheels/releases/download/xformers-0.015.dev0-py38/xformers-0.0.15.dev0-cp38-cp38-linux_x86_64.whl
#%pip install -q https://github.com/brian6091/xformers-wheels/releases/download/0.0.15.dev0%2B4c06c79/xformers-0.0.15.dev0+4c06c79.d20221205-cp38-cp38-linux_x86_64.whl


In [None]:
#@title 3. Token, Class, Prompt
#@markdown Enter Token and Class words.  If empty, defaults will be used.

# TOKEN is a unique identifier linked to the subject that you are training
TOKEN_WORD = "zwx" #@param {type:"string"}
if len(TOKEN_WORD) == 0:
  TOKEN_WORD = "zwx"

# CLASS is a coarse class descriptor of the subject (e.g. person, man, woman, cat, dog, watch, etc.).
CLASS_WORD = "person" #@param {type:"string"}
if len(CLASS_WORD) == 0:
  CLASS_WORD = "person"

# INSTANCE_PROMPT will be made up of TOKEN_WORD + CLASS_WORD. 
# Examples : 
# "photo of a zwx person."
# "painting of a zwx person."
INSTANCE_PROMPT = f'{TOKEN_WORD} {CLASS_WORD}'

SAMPLE_PROMPT = f'a photo of {TOKEN_WORD} {CLASS_WORD}'
#@markdown INSTANCE_PROMPT will be token + class, i.e. "zwx person" \
#@markdown _This will be used when you generate your images._

#@markdown SAMPLE_PROMPT will be "a photo of a token + class", i.e. "a photo of a zwx person" \
#@markdown _This is used only during training to produce sample images during training._

In [None]:
#@title 4. Google Drive, Model Paths & Directory Settings
from google.colab import drive
from os import path

google_drive_dir = '/content/drive'

#@markdown Mount Google Drive
mount_google_drive = True #@param {type:"boolean"}
if mount_google_drive==True:
  if path.exists(google_drive_dir)==False: 
    drive.mount(google_drive_dir)
    print(f'1Google Drive mounted to {google_drive_dir}')
  else: 
    print(f'Google Drive already mounted at {google_drive_dir}')

#@markdown Save trained models directly in google drive? (will override above setting)
save_models_to_gdrive = False #@param {type:"boolean"}
if save_models_to_gdrive==True:
  if path.exists(google_drive_dir)==False: 
    drive.mount('google_drive_dir')
    print(f'2Google Drive mounted to {google_drive_dir}')
  else:
    print(f'Google Drive already mounted at {google_drive_dir}')
    
#@markdown Name/Path of the initial model. (this can be a HuggingFace repo address or a local path)
MODEL_NAME = "runwayml/stable-diffusion-v1-5" #@param {type:"string"}

#@markdown Enter the directory name to save model in. Leave empty for default. \
#@markdown _Default will be `stable_diffusion_weights/{TOKEN_WORD}`_
OUTPUT_DIR = "stable_diffusion_weights/zwx" #@param {type:"string"}
if save_models_to_gdrive:
  if len(OUTPUT_DIR)==0:
    OUTPUT_DIR = f'{google_drive_dir}' + "/MyDrive/" + f'stable_diffusion_weights/{TOKEN_WORD}'
  else:
    OUTPUT_DIR = f'{google_drive_dir}' + "/MyDrive/" + OUTPUT_DIR
else:
  if len(OUTPUT_DIR)==0:
    OUTPUT_DIR = "/content/" + f'stable_diffusion_weights/{TOKEN_WORD}'
  else:
    if OUTPUT_DIR.startswith('/content/') == False:
      OUTPUT_DIR = '/content/' + f'{OUTPUT_DIR}'    

print(f"[*] Weights will be saved at {OUTPUT_DIR}")

!mkdir -p $OUTPUT_DIR


In [None]:
#@title 5. Instance and Class Directory Paths
# After running this cell, place your instance (training images) images into the directory specified here
#@markdown #### Specify dir for instance images or leave blank for default. \
#@markdown _Default will be /<area>content/training_images/{TOKEN_WORD}._ \
INSTANCE_DIR = '/content/training_images/zwx' #@param {type:"string"}

if len(INSTANCE_DIR) == 0: 
  INSTANCE_DIR = f'/content/training_images/{TOKEN_WORD}' 
else:
  if INSTANCE_DIR.startswith('/content/')==False:
   INSTANCE_DIR = '/content/' + f'{INSTANCE_DIR}'

# After running this cell, place your class (regularization) images into the directory specificed here. 
# Making them readily available in Google Drive will make things faster

#@markdown #### Specify dir for class images (can be prexisting directory) or leave blank for default.
#@markdown _Default will be /<area>content/class_images/{CLASS_WORD}._ \
#@markdown _If no class images are found then they will be created during training (slower)._ \
#@markdown _If existing class images are found then they will be used. (faster)._
CLASS_DIR =  '/content/drive/MyDrive/class_images/SD1-5/person-ddim' #@param {type:"string"}
if len(CLASS_DIR) == 0: 
  CLASS_DIR = f'/content/class_images/{CLASS_WORD}'
else:
  if CLASS_DIR.startswith('/content/')==False:
   CLASS_DIR = '/content/' + f'{CLASS_DIR}'


In [None]:
#@title 6. Concepts List.
# variables used so far are not necessary and are merely for clarity for beginners.
# You can just type in literal strings as shown in the commented-out concepts


# You can also add multiple concepts here. 

# Try tweaking `--max_train_steps` accordingly the more concepts you have.
concepts_list = [
    {
        "instance_prompt":      f'{INSTANCE_PROMPT}',
        "class_prompt":         f'{CLASS_WORD}',
        "instance_data_dir":    f'{INSTANCE_DIR}',
        "class_data_dir":       f'{CLASS_DIR}'
    },
#    {
#        "instance_prompt":      "zwx person",
#        "class_prompt":         "person",
#        "instance_data_dir":    "/content/training_images/zwx",
#        "class_data_dir":       "/content/drive/MyDrive/class_images/SD1-5/person-ddim"
#    },
#     {
#         "instance_prompt":      "ukj dog",
#         "class_prompt":         "dog",
#         "instance_data_dir":    "/content/training_images/ukj",
#         "class_data_dir":       "/content/drive/MyDrive/class_images/SD1-5/dog-ddim"
#     }
]

import json
import os
# create an instance directory for each concept's training images
for c in concepts_list:
    os.makedirs(c["instance_data_dir"], exist_ok=True)

# create the concepts_list.json file
with open("concepts_list.json", "w") as f:
    json.dump(concepts_list, f, indent=4)

In [None]:
#@title 7. Upload Your Training Images 🌌🌄🏞️

#@markdown You can use the file manager on the left panel to upload \
#@markdown (drag and drop) your instance images to INSTANCE_DIR defined in `CELL 5` 

### Training Parameter Combinations

Use the table below to choose the best flags based on your memory and speed requirements. Tested on Tesla T4 GPU.


| `fp16` | `train_batch_size` | `gradient_accumulation_steps` | `gradient_checkpointing` | `use_8bit_adam` | GB VRAM usage | Speed (it/s) |
| ---- | ------------------ | ----------------------------- | ----------------------- | --------------- | ---------- | ------------ |
| fp16 | 1                  | 1                             | TRUE                    | TRUE            | 9.92       | 0.93         |
| no   | 1                  | 1                             | TRUE                    | TRUE            | 10.08      | 0.42         |
| fp16 | 2                  | 1                             | TRUE                    | TRUE            | 10.4       | 0.66         |
| fp16 | 1                  | 1                             | FALSE                   | TRUE            | 11.17      | 1.14         |
| no   | 1                  | 1                             | FALSE                   | TRUE            | 11.17      | 0.49         |
| fp16 | 1                  | 2                             | TRUE                    | TRUE            | 11.56      | 1            |
| fp16 | 2                  | 1                             | FALSE                   | TRUE            | 13.67      | 0.82         |
| fp16 | 1                  | 2                             | FALSE                   | TRUE            | 13.7       | 0.83          |
| fp16 | 1                  | 1                             | TRUE                    | FALSE           | 15.79      | 0.77         |


Add `--gradient_checkpointing` flag for around 9.92 GB VRAM usage.

remove `--use_8bit_adam` flag for full precision. Requires 15.79 GB with `--gradient_checkpointing` else 17.8 GB.

remove `--train_text_encoder` flag to reduce memory usage further, degrades output quality.

In [None]:
#@title 8. Training!
!accelerate launch train_dreambooth.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --pretrained_vae_name_or_path="stabilityai/sd-vae-ft-mse" \
  --output_dir=$OUTPUT_DIR \
  --revision="fp16" \
  --with_prior_preservation --prior_loss_weight=1.0 \
  --seed=1337 \
  --resolution=512 \
  --train_batch_size=1 \
  --train_text_encoder \
  --mixed_precision="fp16" \
  --use_8bit_adam \
  --gradient_accumulation_steps=1 \
  --learning_rate=1e-6 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --num_class_images=50 \
  --sample_batch_size=4 \
  --max_train_steps=2000 \
  --save_interval=10000 \
  --save_sample_prompt="a photo of zwx person" \
  --concepts_list="concepts_list.json"

# Reduce the `--save_interval` to lower than `--max_train_steps` to save weights from intermediate steps.
# `--save_sample_prompt` can be same as `--instance_prompt` to generate intermediate samples (saved along with weights in samples directory).

In [None]:
#@markdown Specify the weights directory to use (leave blank for latest, but you still need to run the cell)
WEIGHTS_DIR = "" #@param {type:"string"}
if WEIGHTS_DIR == "":
    from natsort import natsorted
    from glob import glob
    import os
    WEIGHTS_DIR = natsorted(glob(OUTPUT_DIR + os.sep + "*"))[-1]
print(f"[*] WEIGHTS_DIR={WEIGHTS_DIR}")

[*] WEIGHTS_DIR=/content/stable_diffusion_weights/zwx/800


In [None]:
#@markdown Run to generate a grid of preview images from the last saved weights.
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

weights_folder = OUTPUT_DIR
folders = sorted([f for f in os.listdir(weights_folder) if f != "0"], key=lambda x: int(x))

row = len(folders)
col = len(os.listdir(os.path.join(weights_folder, folders[0], "samples")))
scale = 4
fig, axes = plt.subplots(row, col, figsize=(col*scale, row*scale), gridspec_kw={'hspace': 0, 'wspace': 0})

for i, folder in enumerate(folders):
    folder_path = os.path.join(weights_folder, folder)
    image_folder = os.path.join(folder_path, "samples")
    images = [f for f in os.listdir(image_folder)]
    for j, image in enumerate(images):
        if row == 1:
            currAxes = axes[j]
        else:
            currAxes = axes[i, j]
        if i == 0:
            currAxes.set_title(f"Image {j}")
        if j == 0:
            currAxes.text(-0.1, 0.5, folder, rotation=0, va='center', ha='center', transform=currAxes.transAxes)
        image_path = os.path.join(image_folder, image)
        img = mpimg.imread(image_path)
        currAxes.imshow(img, cmap='gray')
        currAxes.axis('off')
        
plt.tight_layout()
plt.savefig('grid.png', dpi=72)

## Convert weights to ckpt to use in web UIs like AUTOMATIC1111.

In [None]:
#@markdown Run conversion.
ckpt_path = WEIGHTS_DIR + "/model.ckpt"

half_arg = ""
#@markdown  Whether to convert to fp16, takes half the space (2GB).
fp16 = True #@param {type: "boolean"}
if fp16:
    half_arg = "--half"
!python convert_diffusers_to_original_stable_diffusion.py --model_path $WEIGHTS_DIR  --checkpoint_path $ckpt_path $half_arg
print(f"[*] Converted ckpt saved at {ckpt_path}")

## Inference

In [None]:
import torch
from torch import autocast
from diffusers import StableDiffusionPipeline, DDIMScheduler
from IPython.display import display

model_path = WEIGHTS_DIR             # If you want to use previously trained model saved in gdrive, replace this with the full path of model in gdrive

pipe = StableDiffusionPipeline.from_pretrained(model_path, safety_checker=None, torch_dtype=torch.float16).to("cuda")
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe.enable_xformers_memory_efficient_attention()
g_cuda = None

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


Moving 0 files to the new cache system


0it [00:00, ?it/s]

In [None]:
#@markdown Can set random seed here for reproducibility.
g_cuda = torch.Generator(device='cuda')
seed = 52362 #@param {type:"number"}
g_cuda.manual_seed(seed)

<torch._C.Generator at 0x7f446c5b4dd0>

In [None]:
#@title Run for generating images.

prompt = "photo of zwx person in a bucket" #@param {type:"string"}
negative_prompt = "" #@param {type:"string"}
num_samples = 4 #@param {type:"number"}
guidance_scale = 7.5 #@param {type:"number"}
num_inference_steps = 24 #@param {type:"number"}
height = 512 #@param {type:"number"}
width = 512 #@param {type:"number"}

with autocast("cuda"), torch.inference_mode():
    images = pipe(
        prompt,
        height=height,
        width=width,
        negative_prompt=negative_prompt,
        num_images_per_prompt=num_samples,
        num_inference_steps=num_inference_steps,
        guidance_scale=guidance_scale,
        generator=g_cuda
    ).images

for img in images:
    display(img)

In [None]:
#@markdown Run Gradio UI for generating images.
import gradio as gr

def inference(prompt, negative_prompt, num_samples, height=512, width=512, num_inference_steps=50, guidance_scale=7.5):
    with torch.autocast("cuda"), torch.inference_mode():
        return pipe(
                prompt, height=int(height), width=int(width),
                negative_prompt=negative_prompt,
                num_images_per_prompt=int(num_samples),
                num_inference_steps=int(num_inference_steps), guidance_scale=guidance_scale,
                generator=g_cuda
            ).images

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="Prompt", value="photo of zwx dog in a bucket")
            negative_prompt = gr.Textbox(label="Negative Prompt", value="")
            run = gr.Button(value="Generate")
            with gr.Row():
                num_samples = gr.Number(label="Number of Samples", value=4)
                guidance_scale = gr.Number(label="Guidance Scale", value=7.5)
            with gr.Row():
                height = gr.Number(label="Height", value=512)
                width = gr.Number(label="Width", value=512)
            num_inference_steps = gr.Slider(label="Steps", value=24)
        with gr.Column():
            gallery = gr.Gallery()

    run.click(inference, inputs=[prompt, negative_prompt, num_samples, height, width, num_inference_steps, guidance_scale], outputs=gallery)

demo.launch(debug=True)

In [None]:
#@title (Optional) Delete diffuser and old weights and only keep the ckpt to free up drive space.

#@markdown [ ! ] Caution, Only execute if you are sure u want to delete the diffuser format weights and only use the ckpt.
import shutil
from glob import glob
import os
for f in glob(OUTPUT_DIR+os.sep+"*"):
    if f != WEIGHTS_DIR:
        shutil.rmtree(f)
        print("Deleted", f)
for f in glob(WEIGHTS_DIR+"/*"):
    if not f.endswith(".ckpt") or not f.endswith(".json"):
        try:
            shutil.rmtree(f)
        except NotADirectoryError:
            continue
        print("Deleted", f)

In [None]:
#@title Free runtime memory
exit()