In [None]:
import os
import json
import base64
from datetime import datetime

import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

In [2]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_BASE_URL = "https://api.openai.com/v1"
WORKING_DIRECTORY_BASE_NAME = f"tmp_{datetime.now().strftime('%d.%m.%Y-%H:%M:%S')}"
IMAGE_BASE_PATH = "Path of images"
DATASET_PATH = "Path of dataset"

os.mkdir(WORKING_DIRECTORY_BASE_NAME)

In [3]:
client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
df = pd.read_json(DATASET_PATH)

In [4]:
def line_template_filler(id, question, image_bin):
    return {
        "custom_id": f"{id}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            "model": "gpt-4o-2024-08-06",
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{base64.b64encode(image_bin).decode('utf-8')}"
                            }
                        },
                        {
                            "type": "text",
                            "text": question
                        }
                    ]
                }
            ],
            "temperature": 1,
            "max_tokens": 256,
            "top_p": 1,
            "frequency_penalty": 0,
            "presence_penalty": 0,
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                    "name": "object_counter",
                    "strict": True,
                    "schema": {
                        "type": "object",
                        "properties": {
                            "count": {
                                "type": "integer"
                            }
                        },
                        "additionalProperties": False,
                        "required": [
                            "count"
                        ]
                    }
                }
            }
        }
    }

In [None]:
filename = "DATASET NAME"
query = "0 <= index < 1100"

with open(f"{WORKING_DIRECTORY_BASE_NAME}/{filename}.jsonl", 'w') as request_file:
    for index, row in df.query(query).iterrows():
        with open(f"{IMAGE_BASE_PATH}/{row['image']}", 'rb') as f:
            image = f.read()
        question = f"How many {row['object_of_interest']} are visibile in the image?"
        jsonl = line_template_filler(id=index, question=question, image_bin=image)
        request_file.write(json.dumps(jsonl))
        request_file.write("\n")

batch_input_file = client.files.create(
  file=open(f"{WORKING_DIRECTORY_BASE_NAME}/{filename}.jsonl", "rb"),
  purpose="batch"
)

batch_input_file_id = batch_input_file.id

client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h"
)

# Results

In [1]:
import glob
import json
import numpy as np
import pandas as pd

In [2]:
DATASET_PATH = "Path of the results file"
df = pd.read_json(DATASET_PATH)

In [None]:
llm_count = dict()
for file in glob.glob("output/*.jsonl"):
    with open(file) as f:
        for line in f:
            res = json.loads(line)
            llm_count[res['custom_id']] = json.loads(res['response']['body']['choices'][0]['message']['content'])['count']

In [6]:
df['llm_count'] = [None]*len(df)

In [7]:
for key, value in llm_count.items():
    df.loc[df.index == int(key), 'llm_count'] = value

In [8]:
df.to_csv("output.csv", index=False)

In [9]:
def calculate_metrics(df):
    print("EA", (df['llm_count'].to_numpy() == df['answer'].to_numpy()).sum() / len(df))
    print("MAE", sum(abs(df['llm_count'].to_numpy() - df['answer'].to_numpy())) / len(df))
    print("RMSE", (np.sqrt(sum((df['llm_count'].to_numpy() - df['answer'].to_numpy())**2)/len(df))).item())

In [None]:
calculate_metrics(df)