## GPT4o Reasoning

[Prompt Instruction](https://platform.openai.com/docs/guides/vision)

In [1]:
!pip install opencv-python -q
!pip install torchvision -q
!pip install torchmetrics -q
!pip install torchmetrics[image] -q
!pip install "torchmetrics[image]" -q
!pip install torch-fidelity -q
!pip install numpy -q
!pip install torchmetrics -q
!pip install torch -q
!pip install openai -q

In [18]:
import json
import pandas as pd
import re
from PIL import Image
import numpy as np
import cv2
from torchvision import transforms
from torchmetrics.image.fid import FrechetInceptionDistance
import torch
import base64
from io import BytesIO
import base64
import requests
import openai
from openai import OpenAI

In [62]:
MODEL = "gpt-4o-2024-08-06"
API_KEY = "sk-gbsCKZGpIregn-z1hsGCwm1kQO--aX6KUpPXbDJwLzT3BlbkFJmUz9kdU-j5DSONsuB-SYjvZJhSl9SwFvM4lKAfQPsA"

In [None]:
system_prompt = """
You want to change the content of a specific area within an image. This technique is called text-guided image editing. 

It involves the following elements:
- Original Image: The starting image before any edits.
- Prompt: A text description that specifies the desired change in the original image.
- Mask: A defined area in the image where the change should occur according to the prompt.
- Edited Image: The final image after the desired change has been made.

Your task is to evaluate the quality of these edits by considering three different aspects. 
Each aspect should be rated on a scale from 1 to 10, where 1 indicates "very poor" and 10 means "excellent."

Aspects to Evaluate:
1. Prompt-Image Alignment: 
    - Objective: Assess how well the edited area aligns with the instructions provided in the text prompt.
    - Considerations: Verify if the desired changes are accurately implemented. Pay attention to details such as numbers, colors, and objects mentioned in the prompt.
2. Visual Quality: 
    - Objective: Evaluate the visual appeal of the edited area within the mask, focusing solely on the appearance of the new content within the masked area.
    - Considerations: Assess realism and aesthetics, including color accuracy and overall visual coherence.
3. Consistency Between Original Image and Edited Area: 
    - Objective: Measure how well the edit integrates with the original image.
    - Considerations: Examine consistency in style, lighting, logic, and spatial coherence between the edited area and the original image.
4. Overall Rating:
    - After evaluating each aspect individually, provide an overall rating of the entire edited image. Consider how you perceive and like the edit as a whole, how well it meets your expectations and integrates with the original image. 


Input and Output:
- Input: The evaluation will be based on the following items:
    - Original image
    - Text prompt
    - Image with a masked area
    - Edited image
- Output: Provide your ratings in the following JSON format. Fill "score" keys with numerical values.
{
  "alignment": "",
  "visual_quality": "",
  "consistency": "",
  "overall": ""
}

Additional Instructions:
- Careful justification: Think carefully about your ratings and. Avoid providing ratings without thoughtful consideration.
- Output: Do not include anything other than the JSON file in your response.

"""


def get_user_prompt(instruction):
    return f"Evaluate the quality given the following prompt: {instruction}."

In [None]:
client = OpenAI(api_key=API_KEY)

def call_api(prompt, img_original, img_mask, img_edited, instruction):
    response = client.chat.completions.create(
        model= MODEL, #"gpt-4o", 
        messages=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text", 
                        "text": get_user_prompt(instruction)
                    },
                    {
                        "type": "image_url", 
                        "image_url": {
                            "url": f'data:image/png;base64,{img_original}'
                        },
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{img_mask}"
                        }
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{img_edited}"
                        }
                    },
                ]
            }
        ],
        max_tokens=300,
    )
    return response.choices[0].message.content

In [3]:
labels = pd.read_csv("labels.csv", sep=";")

In [4]:
labels.head(2)

Unnamed: 0,id,turn,human_rating_binary
0,49,2,0
1,49,3,0


In [67]:
columns = ['model', 'id', 'turn', 'alignment', 'visual_quality', 'consistency', 'overall']
df_gpt = pd.DataFrame(columns=columns)

In [2]:

def encode_image(image):
    if isinstance(image, Image.Image):
        buffered = BytesIO()
        image.save(buffered, format="PNG")  # oder ein anderes unterstütztes Format
        return base64.b64encode(buffered.getvalue()).decode('utf-8')
    else:
        raise ValueError("Input must be a PIL Image.")

'''
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
'''

'\ndef encode_image(image_path):\n    with open(image_path, "rb") as image_file:\n        return base64.b64encode(image_file.read()).decode(\'utf-8\')\n'

In [6]:
with open('edit_turns.json') as f:
    turns = json.load(f)

In [100]:
labels
labels.loc[labels["id"] == 200558, "id"] = 100558
labels

Unnamed: 0,id,turn,human_rating_binary
0,49,2,0
1,49,3,0
2,5434,3,0
3,43425,2,0
4,54492,2,0
...,...,...,...
73,194956,2,1
74,203920,1,1
75,209923,3,1
76,211860,1,1


In [101]:
pattern = r'(\d+)-output(\d+)'

for index,row in labels.iterrows():
#for index, row in labels.iloc[:5].iterrows():
    id = row["id"]
    turn = row["turn"]
    if row["human_rating_binary"]==0:
        path = "bad_samples"
    elif row["human_rating_binary"]==1:
        path = "good_samples"
    else: 
        print("Check Binary human rating, neither 0 nor 1!")
    
    for entry in turns:
        output = entry["output"]
        match = re.search(pattern, output)
        if match:
            found_id = match.group(1)
            found_turn = match.group(2)
            
            if int(found_id) == id and int(found_turn) == turn:

                if not any((df_gpt["id"] == found_id) & (df_gpt["turn"] == found_turn)):
                    
                    instruction = entry["instruction"]
                    input_file = entry["input"]
                    mask_file = entry["mask"]

                    output_image = Image.open(fr'{path}/{output}')
                    input_image = Image.open(fr'{path}/{input_file}')
                    mask_image = Image.open(fr'{path}/{mask_file}')

                    mask_image = mask_image.convert('RGB')

                    output_array = np.array(output_image)
                    mask_array = np.array(mask_image)
                    masked_area = cv2.absdiff(output_array, mask_array)

                    to_tensor = transforms.ToTensor()
                    masked_area_tensor = to_tensor(masked_area).unsqueeze(0)
                    masked_area_image = transforms.ToPILImage()(masked_area_tensor.squeeze(0))

                    input_image_encoded = encode_image(input_image)
                    masked_area_encoded = encode_image(masked_area_image)
                    output_image_encoded = encode_image(output_image)
                
                    response = call_api(
                        api_key,
                        input_image_encoded,
                        masked_area_encoded, 
                        output_image_encoded,
                        instruction
                    )
                    
                    response = json.loads(response)
                    print(response)
                    
                    new_row = pd.DataFrame({
                        "model": [MODEL],
                        "id": [found_id],
                        "turn": [found_turn], 
                        "alignment": [response.get("alignment", None)],
                        "visual_quality": [response.get("visual_quality", None)],
                        "consistency": [response.get("consistency", None)], 
                        "overall": [response.get("overall", None)]
                    })

                    df_gpt = pd.concat([df_gpt, new_row], ignore_index=True)
                    

{'alignment': '10', 'visual_quality': '9', 'consistency': '9', 'overall': '9'}


In [102]:
df_gpt.to_csv("gpt_scores.csv")