In [1]:
import json
base_path = "/home/s4kibart/vision-retrieval/data/eval_all"
query_images = f"{base_path}/microtexts-retrieval-complex/srip"
query_name = "cap_rent_increases"

queries_texts = "/home/s4kibart/vision-retrieval/data/retrieval_queries/microtexts-retrieval-complex"
query_text = f"{queries_texts}/{query_name}.json"
query_image = f"{query_images}/{query_name}.png"


with open(query_text, "r") as f:
    query_text = json.load(f)
reference_rankings = {k.split("/")[1]: 4-v for k, v in query_text["userdata"]["cbrEvaluations"][0]["ranking"].items()}

In [2]:
reference_rankings

{'nodeset6369': 3,
 'nodeset6377': 1,
 'nodeset6384': 1,
 'nodeset6418': 1,
 'nodeset6455': 1,
 'nodeset6465': 2}

In [3]:
casebase_path = f"{base_path}/casebase/srip"
casebase_images = [f"{casebase_path}/{n}.png" for n in reference_rankings.keys()]
casebase_images

['/home/s4kibart/vision-retrieval/data/eval_all/casebase/srip/nodeset6369.png',
 '/home/s4kibart/vision-retrieval/data/eval_all/casebase/srip/nodeset6377.png',
 '/home/s4kibart/vision-retrieval/data/eval_all/casebase/srip/nodeset6384.png',
 '/home/s4kibart/vision-retrieval/data/eval_all/casebase/srip/nodeset6418.png',
 '/home/s4kibart/vision-retrieval/data/eval_all/casebase/srip/nodeset6455.png',
 '/home/s4kibart/vision-retrieval/data/eval_all/casebase/srip/nodeset6465.png']

In [4]:
from PIL import Image
import base64, io

IMG_PLACEHOLDER = "data:image/png;base64,"

def image_to_base64(image: Image.Image) -> str:
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    return IMG_PLACEHOLDER + base64.b64encode(buffered.getvalue()).decode("utf-8")

def append_images(image) -> dict:
    return {"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_to_base64(Image.open(image).convert("RGB"))}}]}

In [5]:
prompt_messages = [{"role": "system", "content": "You are an assistant that identifies visualizations of argument graphs."}, {"role": "user", "content": f"Take a look at the following images in space reclaiming icicle chart visualization. Image 1 represents a query, images 2-{1+len(reference_rankings.keys())} are retrieval candidates. Please rank the retrieval candidates in descending order based on their similarity to the query image. Use this format: number_with_highest_similarity, number_with_second_highest_similarity, ..."},
    ]
for img in casebase_images:
    prompt_messages.append(append_images(img))
#prompt_messages

In [6]:
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()  # take environment variables from .env.

client = OpenAI()

completion = client.chat.completions.create(
  model="ft:gpt-4o-2024-08-06:wi2-trier-university:srip-900x2:APyQkaBQ",
  messages=prompt_messages
)
answer = completion.choices[0].message["content"]

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [6]:
answer = "2, 3, 4, 5, 6, 7"

In [7]:
order = [int(i) for i in answer.split(",")]
keys = list(reference_rankings.keys())
assert len(set(order)) == len(reference_rankings), "Rankings must be unique and equal to the number of images"
assert set(order) == set(range(2, len(reference_rankings)+2)), "Rankings must be a permutation of 1 to N"
simulated_sims = {keys[idx-2]: 1-.1*(pos+1) for pos, idx in enumerate(order)}
simulated_sims


{'nodeset6369': 0.9,
 'nodeset6377': 0.8,
 'nodeset6384': 0.7,
 'nodeset6418': 0.6,
 'nodeset6455': 0.5,
 'nodeset6465': 0.3999999999999999}

In [8]:
reference_rankings

{'nodeset6369': 3,
 'nodeset6377': 1,
 'nodeset6384': 1,
 'nodeset6418': 1,
 'nodeset6455': 1,
 'nodeset6465': 2}

In [9]:
from correctness_completeness import _correctness_completeness_single
_correctness_completeness_single(reference_rankings, simulated_sims, None)

(0.1111111111111111, 1.0)