In [None]:
# https://huggingface.co/docs/diffusers/main/en/conceptual/evaluation
!pip show diffusers

In [None]:
from huggingface_hub import notebook_login
notebook_login()

# Clip Score

CLIP score measures the compatibility of image-caption pairs. Higher CLIP scores imply higher compatibility 🔼. The CLIP score is a quantitative measurement of the qualitative concept “compatibility”. Image-caption pair compatibility can also be thought of as the semantic similarity between the image and the caption. CLIP score was found to have high correlation with human judgement.

In [None]:
# calculate clip score
from torchmetrics.functional.multimodal import clip_score
from functools import partial

clip_score_fn = partial(clip_score, model_name_or_path="openai/clip-vit-base-patch16")


def calculate_clip_score(images, prompts):
    images_int = (images * 255).astype("uint8")
    clip_score = clip_score_fn(torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts).detach()
    return round(float(clip_score), 4)

In [None]:
from diffusers import StableDiffusionPipeline
import torch

model_ckpt = "RadwaH/DreamBoothAgnes2"
dream_pipeline = StableDiffusionPipeline.from_pretrained(model_ckpt, torch_dtype=torch.float16).to("cuda")
prompts = ["close up portrait of sks girl as Christmas elf ,in a film still of jim henson's labyrinth, with christmas elves, full face details, cinematic lighting, hyper realistic facial features, ultra detailed, canon eos 5d, 100mm f/1.8, ISO100"]
dreambooth_images = dream_pipeline(prompts, num_inference_steps=250, guidance_scale=7.5,  num_images_per_prompt=1, output_type="numpy").images

In [None]:
model_ckpt_1_5 = "runwayml/stable-diffusion-v1-5"
sd_pipeline_1_5 = StableDiffusionPipeline.from_pretrained(model_ckpt_1_5, torch_dtype = torch.float16).to("cuda")
images_1_5 = sd_pipeline_1_5(prompts, num_images_per_prompt=1, output_type="numpy").images

In [None]:
!pip show diffusers

In [None]:
import torch
from diffusers import DiffusionPipeline

model_cd = "RadwaH/CustomDiffusionAgnes2"
pipe = DiffusionPipeline.from_pretrained(model_cd, torch_dtype=torch.float16).to("cuda")
pipe.unet.load_attn_procs("path-to-save-model", weight_name="pytorch_custom_diffusion_weights.bin")
pipe.load_textual_inversion("path-to-save-model", weight_name="<new1>.bin")

cd_image = pipe(prompts, num_inference_steps=100, guidance_scale=6.0, eta=1.0).images

In [None]:
dreambooth_clip_score = calculate_clip_score(dreambooth_images, prompts)
print(f"Dreambooth CLIP score: {dreambooth_clip_score}")

sd_clip_score_1_5 = calculate_clip_score(images_1_5, prompts)
print(f"Stable Diffusin v-1-5 CLIP Score: {sd_clip_score_1_5}")

sd_clip_score_1_5 = calculate_clip_score(cd_image, prompts)
print(f"Stable Diffusin v-1-5 CLIP Score: {sd_clip_score_1_5}")

# Class Conditioned Image Generation

Class-conditioned generative models are usually pre-trained on a class-labeled dataset such as ImageNet-1k.
Fréchet Inception Distance is a measure of similarity between two datasets of images. It was shown to correlate well with the human judgment of visual quality and is most often used to evaluate the quality of samples of Generative Adversarial Networks. FID is calculated by computing the Fréchet distance between two Gaussians fitted to feature representations of the Inception network.
These two datasets are essentially the dataset of real images and the dataset of fake images (generated images in our case). FID is usually calculated with two large datasets. However, for this document, we will work with two mini datasets.

In [None]:
from zipfile import ZipFile
import requests


def download(url, local_filepath):
    r = requests.get(url)
    with open(local_filepath, "wb") as f:
        f.write(r.content)
    return local_filepath


dummy_dataset_url = "https://hf.co/datasets/sayakpaul/sample-datasets/resolve/main/sample-imagenet-images.zip"
local_filepath = download(dummy_dataset_url, dummy_dataset_url.split("/")[-1])

with ZipFile(local_filepath, "r") as zipper:
    zipper.extractall(".")
    
from PIL import Image
import os

dataset_path = "sample-imagenet-images"
image_paths = sorted([os.path.join(dataset_path, x) for x in os.listdir(dataset_path)])

real_images_ = [np.array(Image.open(path).convert("RGB")) for path in image_paths]
# apply some lightweight pre-processing on them to use them for FID calculation
from torchvision.transforms import functional as F


def preprocess_image(image):
    image = torch.tensor(image).unsqueeze(0)
    image = image.permute(0, 3, 1, 2) / 255.0
    return F.center_crop(image, (256, 256))


real_images_ = torch.cat([preprocess_image(image) for image in real_images_])
print(real_images_)

In [None]:
from PIL import Image
import numpy as np
import os

dataset_path = "./examples/dreambooth/girl_sks/"
image_paths = sorted([os.path.join(dataset_path, x) for x in os.listdir(dataset_path)])

real_images = [np.array(Image.open(path).convert("RGB")) for path in image_paths]
print(real_images)

In [None]:
# apply some lightweight pre-processing on them to use them for FID calculation
from torchvision.transforms import functional as F


def preprocess_image(image):
    image = torch.tensor(image).unsqueeze(0)
    image = image.permute(0, 3, 1, 2) / 255.0
    print(image)
    image = F.center_crop(image, (256, 256))
    print(image)
    return 


real_images = torch.cat([preprocess_image(image) for image in real_images])
# torch.Size([10, 3, 256, 256])

In [None]:
# Generate Images From Finnetuned Dreambooth
from diffusers import StableDiffusionPipeline
import torch

model_ckpt = "RadwaH/DreamBoothAgnes2"
dream_pipeline = StableDiffusionPipeline.from_pretrained(model_ckpt, torch_dtype=torch.float16).to("cuda")
prompts = ["a hopeful pretty sks girl, HD"]
dreambooth_images = dream_pipeline(prompts, num_inference_steps=250, guidance_scale=7.5,  num_images_per_prompt=10, output_type="numpy").images

dreambooth_images = torch.tensor(dreambooth_images)
dreambooth_images = dreambooth_images.permute(0, 3, 1, 2)
print(dreambooth_images.shape)


In [None]:
import torch
from torchmetrics.image.fid import FrechetInceptionDistance

In [None]:
fid = FrechetInceptionDistance(feature=64)
fid.update(real_images, real=True)
fid.update(fake_images, real=False)

print(f"FID: {float(fid.compute())}")
# FID: 177.7147216796875

In [None]:
!pip install torch-fidelity