In [None]:
%pip install --quiet "torch>=2.0" "torchvision" Pillow matplotlib
%pip install --quiet git+https://github.com/openai/CLIP.git
%pip install --quiet lpips

In [None]:
import os
import json
from pathlib import Path

ROOT = Path(os.getcwd())
IMG_DIR = ROOT / "annotation_images"

def load_json(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

mapping_main = load_json("mapping_file.json")
mapping_ti2i = load_json("mapping_file_ti2i_benchmark.json")

print("main samples:", len(mapping_main))
print("ti2i samples:", len(mapping_ti2i))

In [None]:
def show_some(mapping, n=5, title="main"):
    print(f"=== {title} ===")
    for i, (k, v) in enumerate(mapping.items()):
        print("ID:", k)
        print(" image_path:", resolve_image_path(v["image_path"]))
        print(" original_prompt:", v.get("original_prompt", ""))
        print(" editing_prompt:", v.get("editing_prompt", ""))
        print(" instruction:", v.get("editing_instruction", ""))
        print(" editing_type_id:", v.get("editing_type_id", ""))
        print()
        if i + 1 >= n:
            break

show_some(mapping_main, title="PIE-Bench main")
show_some(mapping_ti2i, title="PIE-Bench ti2i")

In [None]:
from pathlib import Path

ROOT = Path(".")
IMG_ROOT = ROOT / "annotation_images"
CAND_EXT = [".jpg", ".jpeg", ".png", ".webp"]

def resolve_image_path(rel_path: str) -> Path:
    # json: "data/annotation_images/0_random_140/000000000000.jpg"
    parts = Path(rel_path).parts
    if parts[0] == "data":
        parts = parts[1:]
    p = Path(*parts)

    full = ROOT / p
    if full.exists():
        return full

    # если такого файла нет — пробуем подставить другие расширения
    parent = (ROOT / p).parent
    stem = p.stem
    for ext in CAND_EXT:
        cand = parent / f"{stem}{ext}"
        if cand.exists():
            return cand

    # если и так нет — просто возьмём первый файл в папке как fallback
    imgs = sorted(parent.glob("*"))
    if imgs:
        return imgs[0]

    raise FileNotFoundError(f"Не нашёл файл ни по {p}, ни по вариантам расширений.")

# проверка
first_id, first_item = next(iter(mapping_main.items()))
img_path = resolve_image_path(first_item["image_path"])
print("resolved to:", img_path)

from PIL import Image
import matplotlib.pyplot as plt

img = Image.open(img_path).convert("RGB")
plt.imshow(img)
plt.axis("off")
plt.show()

In [None]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

def rle_decode(rle: dict):
    counts = rle["counts"]
    h, w = rle["size"]
    arr = np.zeros(h * w, dtype=np.uint8)
    idx = 0
    val = 0
    for c in counts:
        arr[idx:idx + c] = val
        idx += c
        val = 1 - val
    return arr.reshape((h, w), order="F")

first_id, first_item = next(iter(mapping_main.items()))
img_path = resolve_image_path(first_item["image_path"])
img = Image.open(img_path).convert("RGB")

plt.figure(figsize=(9,4))
plt.subplot(1,2,1)
plt.imshow(img)
plt.title("source")
plt.axis("off")

mask = first_item.get("mask")
if mask:
    m = rle_decode(mask)
    plt.subplot(1,2,2)
    plt.imshow(img)
    plt.imshow(m, alpha=0.4, cmap="Reds")
    plt.title("edit mask")
    plt.axis("off")

plt.show()

In [None]:
import torch
import clip
import lpips
import torchvision.transforms as T

device = "cuda" if torch.cuda.is_available() else "cpu"

clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)
lpips_loss = lpips.LPIPS(net='alex').to(device)

def load_image_for_clip(path: Path):
    img = Image.open(path).convert("RGB")
    return clip_preprocess(img).unsqueeze(0).to(device)

def load_image_for_lpips(path: Path, size=(256,256)):
    img = Image.open(path).convert("RGB").resize(size, Image.BICUBIC)
    t = T.ToTensor()(img).unsqueeze(0).to(device)
    t = t * 2 - 1
    return t

def clip_sim(img_tensor, text: str):
    with torch.no_grad():
        tokens = clip.tokenize([text]).to(device)
        img_feat = clip_model.encode_image(img_tensor)
        txt_feat = clip_model.encode_text(tokens)
        img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True)
        txt_feat = txt_feat / txt_feat.norm(dim=-1, keepdim=True)
        return float((img_feat * txt_feat).sum())

def evaluate_sample(sample):
    src_path = resolve_image_path(sample["image_path"])
    orig_text = sample.get("original_prompt", "")
    edit_text = sample.get("editing_prompt", "")

    img_clip = load_image_for_clip(src_path)
    clip_src_orig = clip_sim(img_clip, orig_text) if orig_text else None
    clip_src_edit = clip_sim(img_clip, edit_text) if edit_text else None

    # для LPIPS сейчас считаем "с собой же" — в реальном сценарии сюда кладёшь
    # путь к сгенерированной/отредактированной картинке
    img_lp = load_image_for_lpips(src_path)
    with torch.no_grad():
        lp = float(lpips_loss(img_lp, img_lp).item())

    return {
        "clip(src, original_prompt)": clip_src_orig,
        "clip(src, editing_prompt)": clip_src_edit,
        "lpips(src, src)": lp,
    }

metrics_example = evaluate_sample(first_item)
print(metrics_example)