In [1]:
from transformers import CLIPProcessor, CLIPModel
import glob
from PIL import Image
import torch

In [9]:
def calc_clip_score(texts, images):
    # CLIPモデルとプロセッサの読み込み
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    # 入力の処理
    inputs = processor(text=texts, images=images, return_tensors="pt", padding=True)

    # モデルを使って特徴量の抽出
    with torch.no_grad():
        outputs = model(**inputs)
        image_features = outputs.image_embeds
        text_features = outputs.text_embeds

        # コサイン類似度の計算
        cosine_similarity = torch.nn.functional.cosine_similarity(image_features, text_features)
        return cosine_similarity.numpy()


def compute_clip_scores(target_directory):
    prompt_files = sorted(glob.glob(f"small_coco/*.txt"))
    prompt_lists = [open(prompt_file).read() for prompt_file in prompt_files]

    fake_files = sorted(glob.glob(f"generated/{target_directory}/*.png"))
    fake_imgs = [Image.open(file) for file in fake_files]

    sim = calc_clip_score(prompt_lists, fake_imgs)
    print(target_directory, sim.mean())

In [10]:
compute_clip_scores("sd15")
compute_clip_scores("sd21")
compute_clip_scores("sdxl")

sd15 0.31849223
sd21 0.31904924
sdxl 0.32087463


## または以下の方法でもOK

In [5]:
!pip install torchmetrics

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting torchmetrics
  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl.metadata (19 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.2-py3-none-any.whl.metadata (4.7 kB)
Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m868.8/868.8 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.2 torchmetrics-1.4.0.post0
[0m

In [8]:
from torchmetrics.multimodal.clip_score import CLIPScore
import glob
from PIL import Image
import torch
import numpy as np

def compute_clip_scores_torchmetrics(target_directory):
    prompt_files = sorted(glob.glob(f"small_coco/*.txt"))
    prompt_lists = [open(prompt_file).read() for prompt_file in prompt_files]

    fake_files = sorted(glob.glob(f"generated/{target_directory}/*.png"))
    fake_imgs = [Image.open(file).resize((224, 224)) for file in fake_files]
    fake_imgs = [torch.from_numpy(np.array(img)) for img in fake_imgs]
    fake_imgs = torch.stack(fake_imgs).permute(0, 3, 1, 2)

    metric =  CLIPScore(model_name_or_path="openai/clip-vit-base-patch32")
    score = metric(fake_imgs, prompt_lists).cpu().detach().numpy()
    print(target_directory, score)

compute_clip_scores_torchmetrics("sd15")
compute_clip_scores_torchmetrics("sd21")
compute_clip_scores_torchmetrics("sdxl")

sd15 31.849224
sd21 31.904922
sdxl 32.08746
