<h1>
<hr style=" border:none; height:3px;">
<center>Evaluation pipeline</center>
<hr style=" border:none; height:3px;">
</h1>

<center><img src='https://netacad.centralesupelec.fr/img/cs.jpg' width=200></center>

<h4><center>Louis LHOTTE | Clément VERON | Edouard SEGUIER</center></h4>

In [10]:
# Imports
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import os

# I - Evaluation

In [2]:
def evaluate_images_with_clip(image_1_path, image_2_path, prompt):
    """
    Evaluate two images based on their relevance to a given text prompt using the CLIP metric.

    Args:
        image_1_path (str): Path to the first image.
        image_2_path (str): Path to the second image.
        prompt (str): The text prompt for evaluation.

    Returns:
        dict: A dictionary containing similarity scores for each image.
    """
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    image_1 = Image.open(image_1_path).convert("RGB")
    image_2 = Image.open(image_2_path).convert("RGB")
    inputs = processor(text=[prompt], images=[image_1, image_2], return_tensors="pt", padding=True)

    outputs = model(**inputs)
    image_embeddings = outputs.image_embeds
    text_embeddings = outputs.text_embeds

    similarity = torch.nn.functional.cosine_similarity(image_embeddings, text_embeddings)

    result = {
        "image_1_score": similarity[0].item(),
        "image_2_score": similarity[1].item(),
    }

    return result

In [12]:
if __name__ == "__main__":
    input_dir = "input"
    output_dir = "output"
    prompt = "Transform this image into a clean, sharp, modern, bold 2D logo design for a mountaineering company. Integrate sharp and minimalistic design elements, the whole image should be transformed into a traditional logo"

    # Get sorted lists of images from both directories
    input_images = sorted([f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))])
    output_images = sorted([f for f in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, f))])

    if len(input_images) != len(output_images):
        print("Error: Mismatch in the number of input and output images.")
    else:
        for input_image, output_image in zip(input_images, output_images):
            input_image_path = os.path.join(input_dir, input_image)
            output_image_path = os.path.join(output_dir, output_image)

            scores = evaluate_images_with_clip(input_image_path, output_image_path, prompt)
            print(f"Input image: {input_image}, Score: {scores['image_1_score']}")
            print(f"Output image: {output_image}, Score: {scores['image_2_score']}")
            print(f"Delta Score: {scores['image_2_score'] - scores['image_1_score']}")

Input image: input_1.jpg, Score: 0.2334834784269333
Output image: output_1.png, Score: 0.2928134500980377
Delta Score: 0.05932997167110443
