In [50]:
import yaml
import torch
import os
import openai
from PIL import Image

from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
from getpass import getpass

OPENAI_API_KEY = getpass()
openai.api_key = OPENAI_API_KEY

def load_config(config_path,config_name):
    with open(os.path.join(config_path, config_name)) as file:
        config = yaml.safe_load(file)
    return config

config = load_config("../","config.yaml")


 ···················································


In [68]:
FACTOID_PROMPT_PATH = config["prompt"]["eval"]["factoid"]
REASONING_PROMPT_PATH = config["prompt"]["eval"]["reasoning"]
COMPLETE_PROMPT_PATH = config["prompt"]["eval"]["complete"]

with open(FACTOID_PROMPT_PATH) as file:
    FACTOID_PROMPT_TEMPLATE = file.read()
with open(REASONING_PROMPT_PATH) as file:
    REASONING_PROMPT_TEMPLATE = file.read()
with open(COMPLETE_PROMPT_PATH) as file:
    COMPLETE_PROMPT_TEMPLATE = file.read()
    
RESULT_JSON_PATH = config["eval"]["llava"]["json_path"]

In [69]:
import json

def unpack_json(json_file_path):
    try:
        with open(json_file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"Error: File '{json_file_path}' not found.")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON in '{json_file_path}': {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

In [78]:
from tqdm.auto import tqdm
from langchain.callbacks import get_openai_callback
import time

unpacked_results = unpack_json(RESULT_JSON_PATH+"LLaVa_3W_split_003.json")
ids, factoid_scores, reason_scores = [],[],[]
raw_scores = []

chat = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
fact_sys_template = (
    "You are a helpful evaluator that is responsible to grade answer of a question based on the description"
)
fact_sys_prompt = SystemMessagePromptTemplate.from_template(template)
fact_human_prompt = HumanMessagePromptTemplate.from_template(FACTOID_PROMPT_TEMPLATE)
fact_prompt = ChatPromptTemplate.from_messages(
    [fact_sys_prompt, fact_human_prompt]
)

reason_sys_template = (
    "You are a helpful evaluator that is responsible to grade answer of a question based on the description"
)
reason_sys_prompt = SystemMessagePromptTemplate.from_template(template)
reason_human_prompt = HumanMessagePromptTemplate.from_template(REASONING_PROMPT_TEMPLATE)
reason_prompt = ChatPromptTemplate.from_messages(
    [reason_sys_prompt, reason_human_prompt]
)

# sys_template = (
#     "You are an NLP expert that is responsible to grade answer & reasoning of a question based on the description"
# )
# sys_prompt = SystemMessagePromptTemplate.from_template(sys_template)
# msg_prompt = HumanMessagePromptTemplate.from_template(COMPLETE_PROMPT_TEMPLATE)
# complete_prompt = ChatPromptTemplate.from_messages(
#     [sys_prompt, msg_prompt]
# )

with get_openai_callback() as cb:
    for sample in tqdm(unpacked_results):
        curr_id = sample["id"]
        curr_desc = sample["description"]
        curr_q = sample["question"]
        curr_sa = sample["short_answer"]
        curr_ra = sample["reasoned_answer"]
        
#         complete_res = chat(
#             complete_prompt.format_prompt(
#                 description = curr_desc,
#                 question = curr_q,
#                 answer = curr_sa,
#                 reason = curr_ra
#             ).to_messages()
#         )
        
        sa_res = chat(
            fact_prompt.format_prompt(
                description = curr_desc,
                question = curr_q,
                answer = curr_sa
            ).to_messages()
        )

        ra_res = chat(
            reason_prompt.format_prompt(
                description = curr_desc,
                question = curr_q,
                reason = curr_ra
            ).to_messages()
        )

        ids.append(curr_id)
#         raw_scores.append(complete_res.content)
        factoid_scores.append(sa_res.content)
        reason_scores.append(ra_res.content)
    
    with open("price_log.txt", "w") as price_log_file:
        price_log_file.write(str(cb))


100%|██████████| 10/10 [00:25<00:00,  2.51s/it]


In [79]:
from datetime import datetime as dt

with open("price_log.txt", "w") as price_log_file:
    price_log_file.write(str(dt.now())+"\n\n")
    price_log_file.write(str(cb))

In [80]:
accs,logics,clears,details,irrels,plauss=[],[],[],[],[],[]

for ID, fs, rs in zip(ids,factoid_scores, reason_scores):
    logic, clear, detail, irrel, plaus = (int(s) for s in rs.split(";"))
    accs.append(int(fs))
    logics.append(logic)
    clears.append(clear)
    details.append(detail)
    irrels.append(irrel)
    plauss.append(plaus)
    

data = [
    {
        "id" : i,
        "accuracy": a,
        "logic" : l,
        "clarity" : c,
        "detail" : d,
        "irrelevance" : ir,
        "plausibility" : p
    }
    for i,a,l,c,d,ir,p in zip(ids,accs,logics,clears,details,irrels,plauss)
]

with open("Eval_LLaVa_3W_split_003.json", 'w') as json_file:
    json.dump(data, json_file, indent=2)