# Dreambooth fine-tuning for Stable Diffusion using d🧨ffusers 

This notebook shows how to "teach" Stable Diffusion a new concept via Dreambooth using 🤗 Hugging Face [🧨 Diffusers library](https://github.com/huggingface/diffusers). 

![Dreambooth Example](https://dreambooth.github.io/DreamBooth_files/teaser_static.jpg)
_By using just 3-5 images you can teach new concepts to Stable Diffusion and personalize the model on your own images_ 

Differently from Textual Inversion, this approach trains the whole model, which can yield better results to the cost of bigger models.

For a general introduction to the Stable Diffusion model please refer to this [colab](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb).



Visit Stable Diffusion Prompt Book :
https://openart.ai/promptbook

## Initial setup

In [None]:
#@title Install the required libs
!pip install -U -qq git+https://github.com/huggingface/diffusers.git
!pip install -qq accelerate tensorboard transformers ftfy gradio
!pip install -qq "ipywidgets>=7,<8"
!pip install -qq bitsandbytes
!pip install -qq datasets
!pip install -qq safetensors
!pip install -qq wandb

In [None]:
#@title [Optional] Install xformers for faster and memory efficient training
#@markdown Acknowledgement: The xformers wheel are taken from [TheLastBen/fast-stable-diffusion](https://github.com/TheLastBen/fast-stable-diffusion). Thanks a lot for building these wheels!
%%time

!pip install -U --pre triton

from subprocess import getoutput
from IPython.display import HTML
from IPython.display import clear_output
import time

s = getoutput('nvidia-smi')
if 'T4' in s:
  gpu = 'T4'
elif 'P100' in s:
  gpu = 'P100'
elif 'V100' in s:
  gpu = 'V100'
elif 'A100' in s:
  gpu = 'A100'

while True:
    try: 
        gpu=='T4'or gpu=='P100'or gpu=='V100'or gpu=='A100'
        break
    except:
        pass
    print('[1;31mit seems that your GPU is not supported at the moment')
    time.sleep(5)

if (gpu=='T4'):
  %pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/T4/xformers-0.0.13.dev0-py3-none-any.whl
  
elif (gpu=='P100'):
  %pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/P100/xformers-0.0.13.dev0-py3-none-any.whl

elif (gpu=='V100'):
  %pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/V100/xformers-0.0.13.dev0-py3-none-any.whl

elif (gpu=='A100'):
  %pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/A100/xformers-0.0.13.dev0-py3-none-any.whl

In [None]:
#@title Import required libraries
import argparse
import itertools
import math
import os
import wandb
from contextlib import nullcontext
import random
import ipywidgets as widgets
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch.utils.data import Dataset

import PIL
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from diffusers import AutoencoderKL, DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, UNet2DConditionModel
from diffusers.optimization import get_scheduler
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from PIL import Image
from torchvision import transforms
from tqdm.auto import tqdm
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from accelerate.utils import set_seed
from diffusers import DPMSolverMultistepScheduler
from huggingface_hub import notebook_login
from datasets import load_dataset
from pathlib import Path
from torchvision import transforms
import bitsandbytes as bnb
from argparse import Namespace
import gc

def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        display(img.resize((768, 768)))
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

## Settings for teaching your new concept

In [None]:
notebook_login()

In [None]:
theme = "landscape"
drop_down = widgets.Dropdown(
    options=["animal", "science", "food", "landscape", "wildcard"],
    description="Pick a theme",
    disabled=False,
)


def dropdown_handler(change):
    global theme
    theme = change.new


drop_down.observe(dropdown_handler, names="value")
display(drop_down)

In [None]:
print(f"You've selected the {theme} theme!")

In [None]:

dataset_id = "CCMat/db-aplaca"  # CHANGE THIS TO YOUR {hub_username}/{dataset_id}
dataset = load_dataset(dataset_id, split="train")
print(dataset)

class_dataset_id = "CCMat/llama" 
class_dataset = load_dataset(class_dataset_id, split="train")
print(class_dataset)

train_imgs = [img['image'] for img in dataset]
class_imgs = [img['image'] for img in class_dataset]


save_path = "./my_concept"
if not os.path.exists(save_path):
  os.mkdir(save_path)
[image.save(f"{save_path}/{i}.jpeg") for i, image in enumerate(train_imgs)]

prior_preservation_class_folder = "./class_images"
if not os.path.exists(prior_preservation_class_folder):
  os.mkdir(prior_preservation_class_folder)

for i, c_image in enumerate(class_imgs):
  #print(c_image)
  c_image.save(f"{prior_preservation_class_folder}/{i}.jpeg") 

In [None]:
display(image_grid(train_imgs[:5], 1, 5))

## Teach the model the new concept (fine-tuning with Dreambooth)
Execute this this sequence of cells to run the training process. The whole process may take from 15 min to 2 hours. (Open this block if you are interested in how this process works under the hood or if you want to change advanced training settings or hyperparameters)

In [None]:
#@title Setup the Classes

class DreamBoothDataset(Dataset):
    def __init__(
        self,
        instance_data_root,
        instance_prompt,
        tokenizer,
        class_data_root=None,
        class_prompt=None,
        size=768,
        center_crop=False,
    ):
        self.size = size
        self.center_crop = center_crop
        self.tokenizer = tokenizer

        self.instance_data_root = Path(instance_data_root)
        if not self.instance_data_root.exists():
            raise ValueError("Instance images root doesn't exists.")

        self.instance_images_path = list(Path(instance_data_root).iterdir())
        self.num_instance_images = len(self.instance_images_path)
        self.instance_prompt = instance_prompt
        self._length = self.num_instance_images

        if class_data_root is not None:
            self.class_data_root = Path(class_data_root)
            self.class_data_root.mkdir(parents=True, exist_ok=True)
            self.class_images_path = list(Path(class_data_root).iterdir())
            self.num_class_images = len(self.class_images_path)
            self._length = max(self.num_class_images, self.num_instance_images)
            self.class_prompt = class_prompt
        else:
            self.class_data_root = None

        self.image_transforms = transforms.Compose(
            [
                transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR),
                transforms.CenterCrop(size) if center_crop else transforms.RandomCrop(size),
                transforms.ToTensor(),
                transforms.Normalize([0.5], [0.5]),
            ]
        )

    def __len__(self):
        return self._length

    def __getitem__(self, index):
        example = {}
        instance_image = Image.open(self.instance_images_path[index % self.num_instance_images])
        if not instance_image.mode == "RGB":
            instance_image = instance_image.convert("RGB")
        example["instance_images"] = self.image_transforms(instance_image)
        example["instance_prompt_ids"] = self.tokenizer(
            self.instance_prompt,
            padding="do_not_pad",
            truncation=True,
            max_length=self.tokenizer.model_max_length,
        ).input_ids

        if self.class_data_root:
            class_image = Image.open(self.class_images_path[index % self.num_class_images])
            if not class_image.mode == "RGB":
                class_image = class_image.convert("RGB")
            example["class_images"] = self.image_transforms(class_image)
            example["class_prompt_ids"] = self.tokenizer(
                self.class_prompt,
                padding="do_not_pad",
                truncation=True,
                max_length=self.tokenizer.model_max_length,
            ).input_ids
        
        return example


class PromptDataset(Dataset):
    def __init__(self, prompt, num_samples):
        self.prompt = prompt
        self.num_samples = num_samples

    def __len__(self):
        return self.num_samples

    def __getitem__(self, index):
        example = {}
        example["prompt"] = self.prompt
        example["index"] = index
        return example

In [None]:
name_of_your_concept = "fluffalpaca"
type_of_thing = "llama"
instance_prompt = f"a photo of {name_of_your_concept} {type_of_thing}"
class_prompt = f"a photo of {type_of_thing}"
print(f"Instance prompt: {instance_prompt}")
print(f"class prompt: {class_prompt}")

prior_preservation = True
prior_preservation_class_prompt = class_prompt

num_class_images = 52
sample_batch_size = 2
prior_loss_weight = 0.7
prior_preservation_class_folder = "./class_images"
class_data_root=prior_preservation_class_folder
class_prompt=prior_preservation_class_prompt
save_images_steps= 22

learning_rate=1e-06
max_train_steps=1100

pretrained_model= "stabilityai/stable-diffusion-2"

In [None]:
#@title Generate Class Images
if(prior_preservation):
    class_images_dir = Path(class_data_root)
    if not class_images_dir.exists():
        class_images_dir.mkdir(parents=True)
    cur_class_images = len(list(class_images_dir.iterdir()))

    if cur_class_images < num_class_images:
        pipeline = StableDiffusionPipeline.from_pretrained(
            pretrained_model, revision="fp16", torch_dtype=torch.float16
        ).to("cuda")
        pipeline.enable_attention_slicing()
        pipeline.set_progress_bar_config(disable=True)

        num_new_images = num_class_images - cur_class_images
        print(f"Number of class images to sample: {num_new_images}.")

        sample_dataset = PromptDataset(class_prompt, num_new_images)
        sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=sample_batch_size)

        for example in tqdm(sample_dataloader, desc="Generating class images"):
            images = pipeline(example["prompt"]).images

            for i, image in enumerate(images):
                image.save(class_images_dir / f"{example['index'][i] + cur_class_images}.jpg")
        pipeline = None
        gc.collect()
        del pipeline
        with torch.no_grad():
          torch.cuda.empty_cache()

In [None]:
#@title Load the Stable Diffusion model
# Load models and create wrapper for stable diffusion
text_encoder = CLIPTextModel.from_pretrained(
    pretrained_model, subfolder="text_encoder"
)
vae = AutoencoderKL.from_pretrained(
    pretrained_model, subfolder="vae"
)
unet = UNet2DConditionModel.from_pretrained(
    pretrained_model, subfolder="unet"
)
tokenizer = CLIPTokenizer.from_pretrained(
    pretrained_model,
    subfolder="tokenizer",
)

feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32")

In [None]:
#@title Setting up all training args
args = Namespace(
    dataset_id = dataset_id,
    pretrained_model=pretrained_model,
    resolution=vae.sample_size,
    center_crop=True,
    train_text_encoder=False,
    instance_data_dir=save_path,
    instance_prompt=instance_prompt,
    learning_rate=learning_rate,
    max_train_steps=max_train_steps,
    save_steps=1012,
    train_batch_size=1,
    gradient_accumulation_steps=1,
    max_grad_norm=1.0,
    mixed_precision="no", # set to "fp16" for mixed-precision training.
    gradient_checkpointing=True, # set this to True to lower the memory usage.
    use_8bit_adam=True, # use 8bit optimizer from bitsandbytes
    seed=3434554,
    with_prior_preservation=prior_preservation, 
    prior_loss_weight=prior_loss_weight,
    sample_batch_size=2,
    class_data_dir=prior_preservation_class_folder, 
    class_prompt=prior_preservation_class_prompt, 
    num_class_images=num_class_images, 
    lr_scheduler="constant",
    lr_warmup_steps=100,

    output_dir="dreambooth_lama",
    logging_dir="logs",
    report_to="wandb",
    
    save_images_steps=save_images_steps,
)

In [None]:
def wandb_cfg(args):
    # SETUP WANDB
    config_dict = {
        "dataset_name": args.dataset_id,
        "seed": args.seed,
        "resolution": args.resolution,
        "train_batch_size": args.train_batch_size,
        #"eval_batch_size": args.eval_batch_size,
        #"num_train_epochs": args.num_train_epochs,
        "pretrained_model": args.pretrained_model,
        "gradient_accumulation_steps": args.gradient_accumulation_steps,
        "learning_rate": args.learning_rate,
        "lr_scheduler": args.lr_scheduler,
        #"lr_warmup_steps": args.lr_warmup_steps,
        #"adam_weight_decay": args.adam_weight_decay,
        #"ddpm_num_steps": args.ddpm_num_steps,
        #"ddpm_beta_schedule": args.ddpm_beta_schedule,
        #"loss_type": args.loss_type,
        #"use_ema": args.use_ema,

        ## dreambooth
        "max_train_steps": args.max_train_steps,
        "instance_prompt": args.instance_prompt,
        '8bit_adam ': args.use_8bit_adam,
        "max_grad_norm": args.max_grad_norm,
        "gradient_checkpointing": args.gradient_checkpointing,
    }

    if args.with_prior_preservation:
        #config_dict["num_class_images"] = args.num_class_images
        config_dict["prior_loss_weight"] = args.prior_loss_weight
        config_dict["class_data_dir"] = args.class_data_dir
        config_dict["class_prompt"] = args.class_prompt
        #config_dict['sample_batch_size'] = args.sample_batch_size
        

    
    return config_dict


def set_wandb_project_run(args):
    """ Initialize wandb directory to keep track of our models. """

    cfg = wandb_cfg(args)
    run = wandb.init(project=args.output_dir, config=cfg, reinit=True)    
    assert run is wandb.run
    print(cfg)

    return run

In [None]:
#@title Training function
def training_function(text_encoder, vae, unet):
    logger = get_logger(__name__)

    set_seed(args.seed)

    accelerator = Accelerator(
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        mixed_precision=args.mixed_precision,
    )

    # Currently, it's not possible to do gradient accumulation when training two models with accelerate.accumulate
    # This will be enabled soon in accelerate. For now, we don't allow gradient accumulation when training two models.
    # TODO (patil-suraj): Remove this check when gradient accumulation with two models is enabled in accelerate.
    if args.train_text_encoder and args.gradient_accumulation_steps > 1 and accelerator.num_processes > 1:
        raise ValueError(
            "Gradient accumulation is not supported when training the text encoder in distributed training. "
            "Please set gradient_accumulation_steps to 1. This feature will be supported in the future."
        )

    vae.requires_grad_(False)
    if not args.train_text_encoder:
        text_encoder.requires_grad_(False)

    if args.gradient_checkpointing:
        unet.enable_gradient_checkpointing()
        if args.train_text_encoder:
            text_encoder.gradient_checkpointing_enable()

    # Use 8-bit Adam for lower memory usage or to fine-tune the model in 16GB GPUs
    if args.use_8bit_adam:
        optimizer_class = bnb.optim.AdamW8bit
    else:
        optimizer_class = torch.optim.AdamW
    
    params_to_optimize = (
        itertools.chain(unet.parameters(), text_encoder.parameters()) if args.train_text_encoder else unet.parameters()
    )

    optimizer = optimizer_class(
        params_to_optimize,
        lr=args.learning_rate,
    )

    noise_scheduler = DDPMScheduler.from_config(args.pretrained_model, subfolder="scheduler")
    
    train_dataset = DreamBoothDataset(
        instance_data_root=args.instance_data_dir,
        instance_prompt=args.instance_prompt,
        class_data_root=args.class_data_dir if args.with_prior_preservation else None,
        class_prompt=args.class_prompt,
        tokenizer=tokenizer,
        size=args.resolution,
        center_crop=args.center_crop,
    )

    def collate_fn(examples):
        input_ids = [example["instance_prompt_ids"] for example in examples]
        pixel_values = [example["instance_images"] for example in examples]

        # concat class and instance examples for prior preservation
        if args.with_prior_preservation:
            input_ids += [example["class_prompt_ids"] for example in examples]
            pixel_values += [example["class_images"] for example in examples]

        pixel_values = torch.stack(pixel_values)
        pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float()

        input_ids = tokenizer.pad(
            {"input_ids": input_ids},
            padding="max_length",
            return_tensors="pt",
            max_length=tokenizer.model_max_length
        ).input_ids

        batch = {
            "input_ids": input_ids,
            "pixel_values": pixel_values,
        }
        return batch
    
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.train_batch_size, shuffle=True, collate_fn=collate_fn
    )

    lr_scheduler = get_scheduler(
        args.lr_scheduler,
        optimizer=optimizer,
        num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps,
        num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
    )

    if args.train_text_encoder:
        unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
            unet, text_encoder, optimizer, train_dataloader, lr_scheduler
        )
    else:
        unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
            unet, optimizer, train_dataloader, lr_scheduler
        )

    weight_dtype = torch.float32
    if accelerator.mixed_precision == "fp16":
        weight_dtype = torch.float16
    elif accelerator.mixed_precision == "bf16":
        weight_dtype = torch.bfloat16
    
    # Move text_encode and vae to gpu.
    # For mixed precision training we cast the text_encoder and vae weights to half-precision
    # as these models are only used for inference, keeping weights in full precision is not required.
    vae.to(accelerator.device, dtype=weight_dtype)
    #vae.decoder.to("cpu")
    if not args.train_text_encoder:
        text_encoder.to(accelerator.device, dtype=weight_dtype)
    
    # We need to recalculate our total training steps as the size of the training dataloader may have changed.
    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
    num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)


    run = set_wandb_project_run(args)
    accelerator.init_trackers(project_name=args.output_dir, config=wandb_cfg(args))

    # Train!
    total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

    print("***** Running training *****")
    print(f"  Pretrained model = {args.pretrained_model}")
    print(f"  Num examples = {len(train_dataset)}")
    print(f"  Num batches each epoch = {len(train_dataloader)}")
    print(f"  Num update steps per epoch = {num_update_steps_per_epoch}")
    print(f"  Instantaneous batch size per device = {args.train_batch_size}")
    print(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
    print(f"  Learning rate = {args.learning_rate}")
    print(f"  Total optimization steps = {args.max_train_steps}")
    print(f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
    print(f"  With Prior Preservation = {args.with_prior_preservation}")
    print(f"  Instance prompt = {args.instance_prompt}")
    # Only show the progress bar once on each machine.
    global_step = 0

    progress_bar = tqdm(range(args.max_train_steps), disable=not accelerator.is_local_main_process)
    progress_bar.set_description("Steps")

    for epoch in range(num_train_epochs):
        unet.train()
        for step, batch in enumerate(train_dataloader):

            with accelerator.accumulate(unet):
                # Convert images to latent space
                latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist.sample()
                latents = latents * 0.18215

                # Sample noise that we'll add to the latents
                noise = torch.randn_like(latents)
                bsz = latents.shape[0]
                # Sample a random timestep for each image
                timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device)
                timesteps = timesteps.long()

                # Add noise to the latents according to the noise magnitude at each timestep
                # (this is the forward diffusion process)
                noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

                # Get the text embedding for conditioning
                encoder_hidden_states = text_encoder(batch["input_ids"])[0]

                # Predict the noise residual
                noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample

                # Get the target for loss depending on the prediction type
                if noise_scheduler.config.prediction_type == "epsilon":
                    target = noise
                elif noise_scheduler.config.prediction_type == "v_prediction":
                    target = noise_scheduler.get_velocity(latents, noise, timesteps)
                else:
                    raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")

                if args.with_prior_preservation:
                    # Chunk the noise and noise_pred into two parts and compute the loss on each part separately.
                    noise_pred, noise_pred_prior = torch.chunk(noise_pred, 2, dim=0)
                    target, target_prior = torch.chunk(target, 2, dim=0)

                    # Compute instance loss
                    loss = F.mse_loss(noise_pred.float(), target.float(), reduction="none").mean([1, 2, 3]).mean()

                    # Compute prior loss
                    prior_loss = F.mse_loss(noise_pred_prior.float(), target_prior.float(), reduction="mean")

                    # Add the prior loss to the instance loss.
                    loss = loss + args.prior_loss_weight * prior_loss
                else:
                    loss = F.mse_loss(noise_pred.float(), target.float(), reduction="mean")

                accelerator.backward(loss)
                
                if accelerator.sync_gradients:
                    params_to_clip = (
                        itertools.chain(unet.parameters(), text_encoder.parameters())
                        if args.train_text_encoder
                        else unet.parameters()
                    )
                    accelerator.clip_grad_norm_(unet.parameters(), args.max_grad_norm)
                optimizer.step()
                optimizer.zero_grad()

            # Checks if the accelerator has performed an optimization step behind the scenes
            if accelerator.sync_gradients:
                progress_bar.update(1)
                global_step += 1

                if global_step % args.save_steps == 0:
                    if accelerator.is_main_process:
                        pipeline = StableDiffusionPipeline.from_pretrained(
                            args.pretrained_model,
                            unet=accelerator.unwrap_model(unet),
                            text_encoder=accelerator.unwrap_model(text_encoder),
                        )
                        save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
                        pipeline.save_pretrained(save_path)

                if accelerator.is_main_process:
                    if global_step % args.save_images_steps  == 0 and global_step >= 391:
                      print(global_step)
                      scheduler = PNDMScheduler(
                          beta_start=0.00085,
                          beta_end=0.012,
                          beta_schedule="scaled_linear",
                          skip_prk_steps=True,
                          steps_offset=1,
                      )
                      pipeline = StableDiffusionPipeline(
                          text_encoder=accelerator.unwrap_model(text_encoder),
                          unet=accelerator.unwrap_model(unet),
                          tokenizer=tokenizer,
                          scheduler = DPMSolverMultistepScheduler.from_pretrained(args.pretrained_model, subfolder="scheduler"),
                          vae=vae,
                          requires_safety_checker=False,
                          safety_checker=StableDiffusionSafetyChecker.from_pretrained(
                              "CompVis/stable-diffusion-safety-checker"
                          ),
                          feature_extractor=feature_extractor,
                      ).to("cuda")
                      pipeline.safety_checker = lambda images, clip_input: (images, False)
                      num_cols = 2
                      all_images = []
                      prompt_lst = [
                          f"{args.instance_prompt} in front of the Colosseum in Rome, ",
                          f"{args.instance_prompt} walking on the moon",
                          f"Portrait photo of {name_of_your_concept} {type_of_thing} with the Eiffel Tower in the background",
                      ]
                      for prompt in prompt_lst:
                        for _ in range(num_cols):
                            images = pipeline(prompt, guidance_scale=9).images
                            all_images.extend(images)
                      img_grid = image_grid(all_images, len(prompt_lst), num_cols)
                      accelerator.log({'Sample generations': wandb.Image(img_grid)})
                      wandb.log({'Sample generations': wandb.Image(img_grid)}, step=global_step)

                      if global_step == 1012:
                        pipeline.save_pretrained(f"{args.output_dir}/{global_step}")

            logs = {"loss": loss.detach().item()}
            progress_bar.set_postfix(**logs)
            accelerator.log(logs, step=global_step)

            if global_step > args.max_train_steps:
                break

        accelerator.wait_for_everyone()
    
    # Create the pipeline using using the trained modules and save it.
    if accelerator.is_main_process:
        pipeline = StableDiffusionPipeline.from_pretrained(
            args.pretrained_model,
            unet=accelerator.unwrap_model(unet),
            text_encoder=accelerator.unwrap_model(text_encoder),
        )
        pipeline.save_pretrained(args.output_dir)

In [None]:
with torch.no_grad():
    torch.cuda.empty_cache()

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [None]:
#@title Run training
import accelerate
accelerate.notebook_launcher(training_function, args=(text_encoder, vae, unet))
for param in itertools.chain(unet.parameters(), text_encoder.parameters()):
  if param.grad is not None:
    del param.grad  # free some memory
  torch.cuda.empty_cache()

In [None]:
pipe = StableDiffusionPipeline.from_pretrained(
    args.output_dir,
    scheduler = DPMSolverMultistepScheduler.from_pretrained('CCMat/fluffalpaca-llama', subfolder="scheduler"),
    torch_dtype=torch.float16,
).to("cuda")
prompt_lst = [
    f"{name_of_your_concept} {type_of_thing} in Paris",
    f"{name_of_your_concept} {type_of_thing} dancing with Elvis Presley",
    f"{instance_prompt} in front of the Colosseum in Rome, ",
    f"{instance_prompt} walking on the moon",
    f"Portrait photo of {name_of_your_concept} {type_of_thing} with the Eiffel Tower in the background",
]
num_cols = 4
for prompt in prompt_lst:
  all_images = []
  for _ in range(num_cols):
      images = pipe(prompt, guidance_scale=11).images
      all_images.extend(images)
  image_grid(all_images, 1, num_cols)

In [None]:
model_name = f"{name_of_your_concept}-{type_of_thing}"
# Describe the theme and model you've trained
description = f"""
This is a Stable Diffusion model fine-tuned on `{type_of_thing}` images for the {theme} theme.
Pretrained Model: {pretrained_model}

"""
print(description)
print(model_name)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from huggingface_hub import HfApi, ModelCard, create_repo, get_full_repo_name

# Set up repo and upload files
hub_model_id = get_full_repo_name(model_name)
create_repo(hub_model_id)
api = HfApi()
api.upload_folder(folder_path=args.output_dir, path_in_repo="", repo_id=hub_model_id)

content = f"""
---
license: creativeml-openrail-m
tags:
- pytorch
- diffusers
- stable-diffusion
- text-to-image
- diffusion-models-class
- dreambooth-hackathon
- {theme}
widget:
- text: {prompt}
---

# DreamBooth model for the {name_of_your_concept} concept trained on the {dataset_id} dataset.

This is a Stable Diffusion model fine-tuned on the {name_of_your_concept} concept with DreamBooth. It can be used by modifying the `instance_prompt`: **{instance_prompt}**

This model was created as part of the DreamBooth Hackathon 🔥. Visit the [organisation page](https://huggingface.co/dreambooth-hackathon) for instructions on how to take part!

## Description

{description}

## Usage

```python
from diffusers import StableDiffusionPipeline

pipeline = StableDiffusionPipeline.from_pretrained('{hub_model_id}')
image = pipeline().images[0]
image
```
"""

card = ModelCard(content)
hub_url = card.push_to_hub(hub_model_id)
print(f"Upload successful! Model can be found here: {hub_url}")
print(
    f"View your submission on the public leaderboard here: https://huggingface.co/spaces/dreambooth-hackathon/leaderboard"
)

In [None]:
#@title Run the Stable Diffusion pipeline with interactive UI Demo on Gradio
#@markdown Run this cell to get an interactive demo where you can run the model using Gradio

#@markdown ![](https://i.imgur.com/2ACLWu2.png)
import gradio as gr

def inference(prompt, num_samples):
    all_images = [] 
    images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=25).images
    all_images.extend(images)
    return all_images

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="prompt")
            samples = gr.Slider(label="Samples",value=1)
            run = gr.Button(value="Run")
        with gr.Column():
            gallery = gr.Gallery(show_label=False)

    run.click(inference, inputs=[prompt,samples], outputs=gallery)
    gr.Examples([["a photo of sks toy riding a bicycle", 1,1]], [prompt,samples], gallery, inference, cache_examples=False)


demo.launch()


In [None]:
model_lst = ['CCMat/fluffalpaca-llama-1012', 'CCMat/fluffalpaca-llama', 'CCMat/fluffalpaca-llama-v2', 'CCMat/fluffalpaca-llama-1100', 'CCMat/fluffalpaca-llama-1078']
pret_model_lst = ['stabilityai/stable-diffusion-2', 'stabilityai/stable-diffusion-2', 'stabilityai/stable-diffusion-2', 'stabilityai/stable-diffusion-2', 'stabilityai/stable-diffusion-2']
lr_lst = [1e-6, 1e-6, 1e-6, 1e-6, 1e-6]
step_lst = [1012, 1034, 1056, 1078, 1100]

# Tune the guidance to control how closely the generations follow the prompt.
# Values between 7-11 usually work best
guidance_scale = 11

num_cols = 3
img_ls = []
for i in range(num_cols):
  img_ls.append(f"im_{i}")
cols = ['pretrained_model', 'lr', 'n_step', 'concept', 'type_of_thing', 'prompt', 'guidance_scale']
cols.extend(img_ls)
wandb.init('dreambooth_lama', reinit=True)
tbl = wandb.Table(columns=cols)
print('ok')

for model_id, pret_model, lr, step in zip(model_lst, pret_model_lst, lr_lst, step_lst):
  print(model_id)
  pipe = StableDiffusionPipeline.from_pretrained(
      model_id ,
      torch_dtype=torch.float16,
  ).to("cuda")
  prompt_list = [
    f"{name_of_your_concept} {type_of_thing} in front of the Eiffel Tower, in the 1970s, vivid colors",
    f"{name_of_your_concept} {type_of_thing} in space by Enki Bilal",
    f"wall graffiti art of {name_of_your_concept} {type_of_thing} dancing with Elvis Presley",
    f"{instance_prompt} in front of the Colosseum in Rome, professional photograph",
    f"{instance_prompt} walking on the moon, vaporwave style, , trending on artstation",
    f"{instance_prompt} fighting against Darth Vader, purple neon lighting unreal engine 5",
    f"Portrait painting of a {name_of_your_concept} {type_of_thing} as a marvel superhero, ultra realistic, highly detailed, concept art",
    f"{instance_prompt} swimming in the river",
    f"a photo of an astronaut riding a {name_of_your_concept} {type_of_thing} on mars",
    f"USSR propoganda poster. Long live the {name_of_your_concept} {type_of_thing}",
  ]

  for prompt in prompt_list:
    all_images = []
    for _ in range(num_cols):
        images = pipe(prompt, guidance_scale=guidance_scale).images
        all_images.extend(images)
    image_grid(all_images, 1, num_cols)
    tbl.add_data(pret_model, lr, step, name_of_your_concept, type_of_thing, prompt, guidance_scale,
               wandb.Image(all_images[0]), wandb.Image(all_images[1]), wandb.Image(all_images[2]), wandb.Image(all_images[3]))

  with torch.no_grad():
    torch.cuda.empty_cache()

wandb.log({'Sample generations': tbl})
wandb.finish()