<h1>
<hr style=" border:none; height:3px;">
<center>Evaluation pipeline</center>
<hr style=" border:none; height:3px;">
</h1>

<center><img src='https://netacad.centralesupelec.fr/img/cs.jpg' width=200></center>

<h4><center>Louis LHOTTE | Clément VERON | Edouard SEGUIER</center></h4>

In [10]:
# Imports
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import os

# I - Evaluation

In [2]:
def evaluate_images_with_clip(image_1_path, image_2_path, prompt):
    """
    Evaluate two images based on their relevance to a given text prompt using the CLIP metric.

    Args:
        image_1_path (str): Path to the first image.
        image_2_path (str): Path to the second image.
        prompt (str): The text prompt for evaluation.

    Returns:
        dict: A dictionary containing similarity scores for each image.
    """
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    image_1 = Image.open(image_1_path).convert("RGB")
    image_2 = Image.open(image_2_path).convert("RGB")
    inputs = processor(text=[prompt], images=[image_1, image_2], return_tensors="pt", padding=True)

    outputs = model(**inputs)
    image_embeddings = outputs.image_embeds
    text_embeddings = outputs.text_embeds

    similarity = torch.nn.functional.cosine_similarity(image_embeddings, text_embeddings)

    result = {
        "image_1_score": similarity[0].item(),
        "image_2_score": similarity[1].item(),
    }

    return result

In [15]:
if __name__ == "__main__":
    input_dir = "input"
    output_dir = "output"
    prompt_dir = "Prompts"

    # Get sorted lists of images from both directories
    input_images = sorted([f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))])
    output_images = sorted([f for f in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, f))])
    prompt_files = sorted([f for f in os.listdir(prompt_dir) if os.path.isfile(os.path.join(prompt_dir, f)) and f.endswith('.txt')])

    if len(input_images) != len(output_images):
        print("Error: Mismatch in the number of input and output images.")
    elif len(input_images) != len(prompt_files):
        print("Error: Mismatch in the number of images and prompts.")
    else:
        for input_image, output_image, prompt_file in zip(input_images, output_images, prompt_files):
            input_image_path = os.path.join(input_dir, input_image)
            output_image_path = os.path.join(output_dir, output_image)
            prompt_path = os.path.join(prompt_dir, prompt_file)

            with open(prompt_path, 'r') as file:
                prompt = file.read().strip()

            scores = evaluate_images_with_clip(input_image_path, output_image_path, prompt)
            print(f"Input image: {input_image}, Score: {scores['image_1_score']:.2f}")
            print(f"Output image: {output_image}, Score: {scores['image_2_score']:.2f}")
            print(f"Delta Score: {scores['image_2_score'] - scores['image_1_score']:.2f}")
            print("=====")

Input image: input_1.jpg, Score: 0.23
Output image: output_1.png, Score: 0.29
Delta Score: 0.06
=====
Input image: input_2.jpg, Score: 0.18
Output image: output_2.png, Score: 0.28
Delta Score: 0.09
=====
Input image: input_3.jpg, Score: 0.18
Output image: output_3.png, Score: 0.23
Delta Score: 0.05
=====
Input image: input_4.png, Score: 0.28
Output image: output_4.png, Score: 0.28
Delta Score: -0.00
=====
Input image: input_5.jpg, Score: 0.28
Output image: output_5.png, Score: 0.30
Delta Score: 0.03
=====



<div class="alert alert-block alert-info">CLIP evaluates the similarity between an image and a given prompt. The delta score can therefore serve as an indicator of the pipeline's "effectiveness" or "performance."
</div>