# DiffFit-pytorch
<a href="https://colab.research.google.com/github/mkshing/difffit-pytorch/blob/main/scripts/difffit_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is an implementation of [DiffFit: Unlocking Transferability of Large Diffusion Models via Simple Parameter-Efficient Fine-Tuning](https://arxiv.org/abs/2304.06648) by using d🧨ffusers. 

- My summary tweet: https://twitter.com/mk1stats/status/1647246562765012993
- Code: https://github.com/mkshing/DiffFit-pytorch


## **Setup**

In [None]:
!nvidia-smi
!git clone https://mkshing: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@github.com/mkshing/difffit-pytorch.git
!pip install -r difffit-pytorch/requirements.txt
!pip install -q -U --pre triton
!pip install -q ftfy bitsandbytes==0.35.0 gradio natsort xformers

In [None]:
# @markdown **(Optional) Login wandb**<br> If you don't use wandb for logging, make sure to remove `--report_to="wandb"`
!wandb login

## **Training DiffFit**

In this example, use 5 dog images as usual by downloading from [here](https://drive.google.com/drive/folders/1BO_dyz-p65qhBRRMRA4TbZ8qW4rB99JZ).

In [None]:
#@title **Dataset**
import datetime
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image


def display_pic(folder):
    fig = plt.figure(figsize=(30, 60))
    files = sorted(glob.glob(folder+'/*.jpg'))
    for i, file in enumerate(files):
        img = Image.open(file)    
        images = np.asarray(img)
        ax = fig.add_subplot(10, 5, i+1, xticks=[], yticks=[])
        image_plt = np.array(images)
        ax.imshow(image_plt)
        name = os.path.basename(file)
        ax.set_xlabel(name, fontsize=30)  
        fig.tight_layout()             
    plt.show()
    plt.close()


# save_image = True #@param {type:"boolean"}
mount_google_drive = True #@param {type:"boolean"}
INSTANCE_DATA_DIR = "/content/drive/MyDrive/AI/dreambooth-dog/data" #@param {type: 'string'}
CLASS_DATA_DIR = "/content/drive/MyDrive/AI/dreambooth-dog/class-data" #@param {type: 'string'}
OUTPUT_DIR = "/content/DiffFitOutput" #@param {type: 'string'}

if CLASS_DATA_DIR is None:
  CLASS_DATA_DIR = OUTPUT_DIR + "/class_data_dir"

force_remount = False
if mount_google_drive:
    from google.colab import drive # type: ignore
    try:
        drive_path = "/content/drive"
        drive.mount(drive_path, force_remount=force_remount)
        # output_path_gdrive = f"/content/drive/MyDrive/{save_dir}"
        # save_dir = output_path_gdrive
    except:
        print("...error mounting drive or with drive path variables")
        print("...reverting to default path variables")
OUTPUT_DIR = os.path.abspath(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"INSTANCE_DATA_DIR: {INSTANCE_DATA_DIR}")
print(f"CLASS_DATA_DIR: {CLASS_DATA_DIR}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")


In [None]:
# @title **Parameters:**
MODEL_NAME = "runwayml/stable-diffusion-v1-5" # @param {type: "string"}
# this is the number nitrosoke recommends 
NUM_CLASS_IMAGES = 200 #@param {type: "integer"}
MAX_TRAIN_STEPS = 500 #@param {type: "integer"}
CHECKPOINTING_STEPS = 100 #@param {type: "integer"}

In [None]:
from accelerate.utils import write_basic_config
write_basic_config()

In [None]:
# @title **Train:**
! accelerate launch difffit-pytorch/train_difffit.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --instance_data_dir=$INSTANCE_DATA_DIR \
  --class_data_dir=$CLASS_DATA_DIR \
  --output_dir=$OUTPUT_DIR \
  --instance_prompt="photo of a sks dog" \
  --resolution=512 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=1 \
  --learning_rate=5e-4 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --num_class_images=$NUM_CLASS_IMAGES \
  --checkpointing_steps=$CHECKPOINTING_STEPS \
  --max_train_steps=$MAX_TRAIN_STEPS \
  --use_8bit_adam \
  --seed=42 \
  --enable_xformers_memory_efficient_attention \
  --gradient_checkpointing \
  --add_vlb_loss \
  --vlb_lambda=0.001 \
  # --with_prior_preservation --prior_loss_weight=1.0 \
  # --class_prompt="photo of a dog" \
  # --train_text_encoder \
  # --report_to="wandb" \
  # --bitfit \
  # --revision="fp16" \
  # --mixed_precision="fp16" \



### **Inference:**

In [None]:
#@markdown **helper functions**
import os
import sys
import io
import requests
import PIL
import torch
from torch import autocast
import huggingface_hub
from transformers import CLIPTextModel
from diffusers import (
    LMSDiscreteScheduler, 
    DDIMScheduler, 
    PNDMScheduler,
    DPMSolverMultistepScheduler, 
    EulerDiscreteScheduler, 
    EulerAncestralDiscreteScheduler,
    StableDiffusionPipeline
)
from PIL import Image
sys.path.append("/content/difffit-pytorch")
from difffit_pytorch.utils import load_unet_for_difffit, load_text_encoder_for_difffit, load_config_for_difffit


SCHEDULER_MAPPING = {
    "ddim": DDIMScheduler,
    "plms": PNDMScheduler,
    "lms": LMSDiscreteScheduler,
    "euler": EulerDiscreteScheduler,
    "euler_ancestral": EulerAncestralDiscreteScheduler,
    "dpm_solver++": DPMSolverMultistepScheduler,
}



def image_grid(imgs, rows, cols):
    assert len(imgs) == rows * cols
    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols * w, rows * h))
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i % cols * w, i // cols * h))
    return grid


In [None]:
# @markdown **Load model:**
import sys
from diffusers import AutoencoderKL

efficient_weights_ckpt = "/content/DiffFitOutput/checkpoint-400" #@param {type:"string"}
scheduler_type = "dpm_solver++" #@param ["ddim", "plms", "lms", "euler", "euler_ancestral", "dpm_solver++"]

device = "cuda" if torch.cuda.is_available() else "cpu"

training_args = load_config_for_difffit(efficient_weights_ckpt)
unet = load_unet_for_difffit(MODEL_NAME, efficient_weights_ckpt=efficient_weights_ckpt, is_bitfit=training_args["bitfit"] ,subfolder="unet")
text_encoder = load_text_encoder_for_difffit(MODEL_NAME, efficient_weights_ckpt=efficient_weights_ckpt, is_bitfit=training_args["bitfit"] ,subfolder="text_encoder")

# load pipe
pipe = StableDiffusionPipeline.from_pretrained(
    MODEL_NAME,
    unet=unet,
    text_encoder=text_encoder,
    vae=AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse"),
    requires_safety_checker=False,
    safety_checker=None,
    feature_extractor=None,
    scheduler=SCHEDULER_MAPPING[scheduler_type].from_pretrained(MODEL_NAME, subfolder="scheduler"),
    torch_dtype=torch.float16
)

pipe = pipe.to(device)
print("loaded pipeline")

In [None]:
# @markdown **Run!:**
# @markdown <br> *It takes time at the 1st run because SVD is performed. 
import random
from tqdm import tqdm

prompt = "A picture of a sks dog in a bucket" #@param {type:"string"}
num_images_per_prompt = 2 # @param {type: "integer"}
guidance_scale = 7.5 # @param {type: "number"}
num_inference_steps = 25 # @param {type: "integer"}
height = 512 # @param {type: "integer"}
width = 512 # @param {type: "integer"}
seed = "random_seed" #@param {type:"string"}

if seed == "random_seed":
  random.seed()
  seed = random.randint(0, 2**32)
else:
  seed = int(seed)
g_cuda = torch.Generator(device='cuda').manual_seed(seed)
print(f"seed: {seed}")

prompts = prompt.split("::")
all_images = []
for prompt in tqdm(prompts):
    with torch.autocast(device), torch.inference_mode():
        images = pipe(
            prompt,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            num_images_per_prompt=num_images_per_prompt,
            height=height,
            width=width,
            generator=g_cuda
        ).images
    all_images.extend(images)
grid_image = image_grid(all_images, len(prompts), num_images_per_prompt)
grid_image

### **(Optional) Upload HuggingFace Hub**

In [None]:
# @markdown login huggingface hub
from huggingface_hub import login
login()

In [None]:
from huggingface_hub import create_repo, upload_folder


hub_model_id = "" #@param {type: "string"}
hub_token = "" #@param {type: "string"}
folder_path = "/content/DiffFitOutput" #@param {type: "string"}

if hub_token == "":
  hub_token = None
repo_id = create_repo(repo_id=hub_model_id, exist_ok=True, token=hub_token).repo_id

base_model = MODEL_NAME
instance_prompt = "photo of a sks dog"  #@param {type: "string"}
# @markdown paste your `instace_prompt` here.

yaml = f"""
---
license: creativeml-openrail-m
base_model: {base_model}
instance_prompt: {instance_prompt}
tags:
- stable-diffusion
- stable-diffusion-diffusers
- text-to-image
- diffusers
- difffit
inference: true
---
"""
model_card = f"""
# DiffFit - {repo_id}
These are DiffFit weights for {base_model}. The weights were trained on {instance_prompt}."""
with open(os.path.join(folder_path, "README.md"), "w") as f:
    f.write(yaml + model_card)

upload_folder(
    repo_id=repo_id,
    folder_path=folder_path,
    commit_message="first commit",
    ignore_patterns=["step_*", "epoch_*"],
)

