 ## Install the Required Packages

In [None]:
!pip install git+https://github.com/huggingface/diffusers.git
!pip install diffusers==0.4.1
!pip install accelerate
!pip install torchvision
!pip install transformers>=4.21.0
!pip install ftfy
!pip install tensorboard
!pip install modelcards
!pip install datasets
!pip install -U diffusers

Collecting git+https://github.com/huggingface/diffusers.git
  Cloning https://github.com/huggingface/diffusers.git to /state/partition1/job-27670277/pip-req-build-qysr29kz
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/diffusers.git /state/partition1/job-27670277/pip-req-build-qysr29kz
  Resolved https://github.com/huggingface/diffusers.git to commit 9a52e33eb65bc580e3257ce504372a67b16fb8df
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: diffusers
  Building wheel for diffusers (pyproject.toml) ... [?25ldone
[?25h  Created wheel for diffusers: filename=diffusers-0.10.0.dev0-py3-none-any.whl size=468367 sha256=3de55de6c676f3a9882e643e1fd4c30ccc6fb846da4b51133e503d788d4321fa
  Stored in directory: /state/partition1/job-27670277/pip-ephem-wheel-cache-mlkzh3f7/wheels/02/d2/24/9d4a75289c359ffceca

## Import the Required Packages

In [None]:
import datasets
import shutil
import h5py
import numpy as np
import io
import PIL.Image as Image
from torchvision.utils import save_image
import torch
import json
from datasets import load_dataset
import pickle
from diffusers import StableDiffusionPipeline

## Check GPU Information

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Not connected to a GPU')
else:
    print(gpu_info)

Fri Dec  9 13:31:02 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:CA:00.0 Off |                    0 |
| N/A   27C    P0    70W / 500W |   6058MiB / 81920MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Initialize the accelerate environment



In [None]:
!accelerate config default

Configuration already exists at /home/sv2128/.cache/huggingface/accelerate/default_config.yaml, will not override. Run `accelerate config` manually or pass a different `save_location`.


## Hugging Face Hub Login

You need to accept the model license before downloading or using the weights. In this example we’ll use model version v1-4, so you’ll need to visit its card (https://huggingface.co/CompVis/stable-diffusion-v1-4), read the license and tick the checkbox if you agree.

You have to be a registered user in 🤗 Hugging Face Hub, and you’ll also need to use an access token for the code to work. For more information on access tokens, please refer to this section of the documentation (https://huggingface.co/docs/hub/security-tokens).

In [None]:
from huggingface_hub import login
login(token='your_access_token')

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid.
Your token has been saved to /home/sv2128/.huggingface/token
Login successful


## Function to Unpickle

We need to use this method to unpickle Cifar-10 dataset batches.

In [None]:
def unpickle(file):
  with open(file, 'rb') as fo:
      dict = pickle.load(fo, encoding='latin1')
  return dict

## Function to Construct Cifar-10 Dataset as Required by Stable Diffusion

To run on your own training files you need to prepare the dataset according to the format required by datasets. You can upload your dataset to the Hugging Face Hub, or you can prepare a local folder with your files. This documentation (https://huggingface.co/docs/hub/security-tokens) explains how to do it.

Make sure there is an empty train folder in CIFAR_DIR

Keep your train data batches of CIFAR-10 in CIFAR_DIR

In [None]:
"""

This funtion uses only the training batches of Cifar-10 dataset and converts them into the required format 
for fine-tuning of the Stable Diffusion model.

"""

def construct_cifar(CIFAR_DIR):

  # Label Names: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

  metajson_list=[]
  label_dict={}
  label_dict[0]='airplane'
  label_dict[1]='automobile'
  label_dict[2]='bird'
  label_dict[3]='cat'
  label_dict[4]='deer'
  label_dict[5]='dog'
  label_dict[6]='frog'
  label_dict[7]='horse'
  label_dict[8]='ship'
  label_dict[9]='truck'

  num_files=5
  for i in range(1,num_files+1):
    print(i)
    data_batch=unpickle(CIFAR_DIR+'/data_batch_'+str(i))

    for j in range(len(data_batch['data'])):
      temp_meta={}
      temp_image=data_batch['data'][j]
      temp_image=temp_image.reshape(3,32,32)
      temp_image=torch.from_numpy(temp_image)
      temp_image=temp_image/255
      save_image(temp_image,CIFAR_DIR+'/train/'+str(data_batch['filenames'][j]))
      temp_meta['file_name']=data_batch['filenames'][j]
      temp_meta['text']=label_dict[data_batch['labels'][j]]
      metajson_list.append(temp_meta)
  print(metajson_list)

  
  with open(CIFAR_DIR+"/train/metadata.jsonl", 'w') as f:
    for item in metajson_list:
        f.write(json.dumps(item) + "\n")


In [None]:
CIFAR_DIR = "path/to/CIFAR_DIR"

In [None]:
construct_cifar(CIFAR_DIR)
#cifar_dataset = datasets.load_dataset("imagefolder", data_dir=CIFAR_DIR, split="train")
#print(cifar_dataset[0])

## Fine-tune Stable Diffusion

The fine-tuning of Stable Diffusion takes around 8 hours on V100 GPU for epochs. Please tune the hyper-parameters based on your requirements.

In [None]:
!(accelerate launch HF_StableD_CV/train_text_to_image.py \
  --pretrained_model_name_or_path="CompVis/stable-diffusion-v1-4" \
  --train_data_dir="path/to/CIFAR_DIR/train" \
  --use_ema \
  --resolution=512 --center_crop --random_flip \
  --train_batch_size=16 \
  --gradient_accumulation_steps=8 \
  --gradient_checkpointing \
  --mixed_precision="bf16" \
  --num_train_epochs=5 \
  --learning_rate=1e-05 \
  --max_grad_norm=1 \
  --lr_scheduler="constant" --lr_warmup_steps=0 \
  --output_dir="path/to/CIFAR_DIR/output/directory" \
)

## Visualize Epoch Vs Loss Graph 

In [None]:
!tensorboard --logdir path/to/CIFAR_DIR/output/directory

## Function to Generate Cifar-10 Dataset

This function generates images based on the labels as text prompts present in test set of Cifar-10

Make sure there is an empty test folder in CIFAR_DIR location

In [None]:
def generate_cifar(CIFAR_DIR):
  label_dict={}
  label_dict[0]='airplane'
  label_dict[1]='automobile'
  label_dict[2]='bird'
  label_dict[3]='cat'
  label_dict[4]='deer'
  label_dict[5]='dog'
  label_dict[6]='frog'
  label_dict[7]='horse'
  label_dict[8]='ship'
  label_dict[9]='truck'
  images_per_class=1000
  num_of_labels=10
  
  model_path = "path/to/CIFAR_DIR/output/directory"
  pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
  pipe.safety_checker = lambda images, clip_input: (images, False)
  pipe.to("cuda")

  for j in range(2,num_of_labels):  
      for i in range(images_per_class):
        image = pipe(prompt=label_dict[j]).images[0]
        image.save("path/to/CIFAR_DIR/test/image_name"+".png")

In [None]:
generate_cifar(CIFAR_DIR)