In [None]:
#@title Installazione delle librerie

!pip install --quiet pillow
!pip install --quiet --upgrade torchmetrics==0.11.4

In [None]:
#@title Import delle librerie
import torch, pandas as pd
from pathlib import Path
from PIL import Image
from torchvision import transforms
from torchmetrics.functional.multimodal import clip_score
import torchmetrics, importlib, sys

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# Trasforma PIL → tensor [0,1] (C,H,W)
to_tensor = transforms.Compose([
    transforms.ToTensor(),                      # [0,255] → [0,1]
])

print(torchmetrics.__version__)


Device: cuda


In [None]:
#@title Questo codice legge un CSV con immagini e descrizioni testuali, e calcola per ciascuna coppia il CLIPScore

from torchmetrics.multimodal.clip_score import CLIPScore
from pathlib import Path
from PIL import Image
import torch, pandas as pd
from torchvision import transforms

# --- impostazioni ---
IMAGES_DIR  = "./images"          # cartella con le immagini
CSV_FILE    = "./clip2.csv"        # filename;prompt
CLIP_MODEL  = "openai/clip-vit-base-patch16"
BATCH_SIZE  = 16
device      = "cuda" if torch.cuda.is_available() else "cpu"

# --- lettura CSV (sep=";") ---
df = pd.read_csv(CSV_FILE, sep=";", names=["filename", "prompt"],
                 header=0, engine="python")
print(f"{len(df)} righe lette ✓")

# --- trasformazione immagine -> tensor ---
to_tensor = transforms.Compose([
    transforms.Resize((512, 512), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.ToTensor(),          # [0,255] → [0,1], shape (3,224,224)
])         # [0,255] -> [0,1]

# --- inizializza metrica ---
metric = CLIPScore(model_name_or_path=CLIP_MODEL).to(device)

all_scores = []

# --- loop sui batch ---
for start in range(0, len(df), BATCH_SIZE):
    batch      = df.iloc[start : start + BATCH_SIZE]
    prompts    = batch["prompt"].tolist()

    # carica e converte le immagini del batch
    imgs = []
    for fname in batch["filename"]:
        img = Image.open(Path(IMAGES_DIR) / fname).convert("RGB")
        imgs.append(to_tensor(img))               # (3,H,W) [0,1]

    imgs_tensor = torch.stack(imgs).to(device)    # (B,3,H,W)

    # CLIPScore per ogni immagine del batch
    with torch.no_grad():
        for img, txt in zip(imgs_tensor, prompts):
            s = metric(img.unsqueeze(0), [txt])   # tensor scalare
            all_scores.append(s.item())

# --- aggiungi colonna e mostra risultati ---
df["clipscore"] = all_scores
print("Calcolati", len(all_scores), "punteggi. Media:",
      round(df["clipscore"].mean(), 2))

display(df)


21 righe lette ✓
Calcolati 21 punteggi. Media: 21.46


Unnamed: 0,filename,prompt,clipscore
0,andrea2.png,a photo of male person,24.660328
1,andreaNaples.png,"a photo of male person, backgroung Naples, hig...",23.487429
2,anroscElegant.png,"a photo of male person, wearing an elegant sui...",20.437971
3,anrosci.png,a photo of male person,24.572065
4,anrosciLuifienPizza.png,"a photo of two person, background beach, eatin...",20.47613
5,anroscVesuvius.png,"a photo of male person, sitting at table, back...",21.178436
6,camdndrElegant.png,"a photo of female person, sitting at table, ba...",20.912155
7,camdndrVesuvius.png,"a photo of female person, sitting at table, ba...",20.842236
8,lufienAnrosci2.png,"a photo of two person, background beach, eatin...",20.841299
9,luifieng.png,"a photo of male person, eating a pizza, high q...",19.34543


In [None]:
display(df)

Unnamed: 0,filename,prompt,clipscore
0,1.jpg,"a photo of famale person and male person, toge...",24.490023
1,andrea_cami1.png,"a photo of famale person and male person, toge...",19.967274
2,andrea_cami2.png,"a photo of famale person and male person, toge...",20.061495
3,andrea_cami3.png,"a photo of famale person and male person, toge...",24.241447
4,AndreaVesuvius.png,a photo of person male wearing an elegant suit...,21.010216
5,anrosc_beer.jpg,"a photo of male person , drinking a beer, back...",19.800726
6,anrosc_sunglasses.png,"a photo of person male, wearing a pink hoodie ...",21.511406
7,anrosc_voke.png,"a photo of person male, drinking a coke, weari...",20.175093
8,CamillaVesuvius.png,"a photo of person wearing an elegant suit, sit...",21.021435


In [None]:
print(all_scores)

[24.490022659301758, 19.967273712158203, 20.061494827270508, 24.24144744873047, 21.010215759277344, 19.80072593688965, 21.51140594482422, 20.175092697143555, 21.021434783935547]
