## Create VENV

In [None]:
python -m venv .controlnet
. .controlnet/bin/activate

## Install Diffusers package


In [None]:
pip install -r requirements.txt

## 3. Model Training

### Training Command

Execute the training script with the following parameters:

In [None]:
wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png

wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_2.png

### 12GB VRAM GPU Configuration

Add the following flags to the training command:

In [None]:
# ONE TIME ONLY: download the model and dataset to a local directory
# incase no internet on training machine
# on a machine with internet
curl -LsSf https://hf.co/cli/install.sh | bash

# this creates a local directory with all required files
hf download stable-diffusion-v1-5/stable-diffusion-v1-5 \
  --local-dir shared/models/sd15 

# on a machine with internet
hf download fusing/fill50k \
  --repo-type dataset \
  --local-dir shared/datasets/fill50k

# after downloading the datasets, extract the zip file and copy train.jsonl to metadata.jsonl


## Bash command - train_sd15.sh

In [None]:
#!/bin/bash
#SBATCH --job-name=controlnet_fill50k
#SBATCH --gres=gpu:1
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --time=01:00:00
#SBATCH --output=logs/controlnet_train_%j.log
#SBATCH --error=logs/controlnet_train_%j.err

export MODEL_DIR="/home/hpc/rlvl/rlvl165v/Desktop/controlnet/shared/models/sd15"
export DATASET_DIR="/home/hpc/rlvl/rlvl165v/Desktop/controlnet/shared/datasets/fill50k/extracted/"
export OUTPUT_DIR="/home/hpc/rlvl/rlvl165v/Desktop/controlnet/output/"

export HF_HOME="/home/woody/rlvl/rlvl165v/.cache/huggingface"
export HF_DATASETS_CACHE="$HF_HOME/datasets"
export TRANSFORMERS_CACHE="$HF_HOME/transformers"
export HF_HUB_CACHE="$HF_HOME/hub"

export HF_HUB_OFFLINE=1
export TRANSFORMERS_OFFLINE=1
export HF_DATASETS_OFFLINE=1

accelerate launch train_controlnet.py \
 --pretrained_model_name_or_path=$MODEL_DIR \
 --output_dir=$OUTPUT_DIR \
 --train_data_dir="$DATASET_DIR" \
 --image_column="image" \
 --conditioning_image_column="conditioning" \
 --caption_column="text" \
 --resolution=512 \
 --learning_rate=1e-5 \
 --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
 --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
 --train_batch_size=1 \
 --gradient_accumulation_steps=4 \
 --max_train_steps=50 \
 --checkpointing_steps=10 \
 --validation_steps=10 \
 --gradient_checkpointing \
 --use_8bit_adam \
 --enable_xformers_memory_efficient_attention \
 --set_grads_to_none


## Training with multiple GPUs

`accelerate` allows for seamless multi-GPU training. Follow the instructions here for running distributed training with accelerate. Here is an example command:

In [None]:
export MODEL_DIR="stable-diffusion-v1-5/stable-diffusion-v1-5"
export OUTPUT_DIR="path to save model"

accelerate launch --mixed_precision="fp16" --multi_gpu train_controlnet.py \
 --pretrained_model_name_or_path=$MODEL_DIR \
 --output_dir=$OUTPUT_DIR \
 --dataset_name=fusing/fill50k \
 --resolution=512 \
 --learning_rate=1e-5 \
 --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
 --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
 --train_batch_size=4 \
 --mixed_precision="fp16" \
 --tracker_project_name="controlnet-test" \
 --report_to=wandb

## Test with ADE20K dataset 

Condition reconstruction and FID score from paper.

In [None]:
#install the dataset tools package to download the dataset
pip install --upgrade dataset-tools


import dataset_tools as dtools

dtools.download(dataset='ADE20K', dst_dir='~/dataset-ninja/')

## Performing inference with the trained ControlNet

The trained model can be run the same as the original ControlNet pipeline with the newly trained ControlNet. Set `base_model_path` and `controlnet_path` to the values `--pretrained_model_name_or_path` and `--output_dir` were respectively set to in the training script.

In [None]:
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from diffusers.utils import load_image
import torch

base_model_path = "/home/hpc/rlvl/rlvl165v/Desktop/controlnet/shared/models/sd15"
# Point directly to the output directory which contains config.json and safetensors file
controlnet_path = "/home/hpc/rlvl/rlvl165v/Desktop/controlnet/output/"

# Load with local_files_only=True to avoid trying to connect to huggingface.co
controlnet = ControlNetModel.from_pretrained(
    controlnet_path, 
    torch_dtype=torch.float16,
    local_files_only=True
)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    base_model_path, 
    controlnet=controlnet, 
    torch_dtype=torch.float16,
    local_files_only=True
)

# speed up diffusion process with faster scheduler and memory optimization
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
# remove following line if xformers is not installed or when using Torch 2.0.
pipe.enable_xformers_memory_efficient_attention()
# memory optimization.
pipe.enable_model_cpu_offload()

control_image = load_image("./conditioning_image_1.png")
prompt = "pale golden rod circle with old lace background"

# generate image
generator = torch.manual_seed(0)
image = pipe(
    prompt, num_inference_steps=20, generator=generator, image=control_image
).images[0]
image.save("./output.png")