In [15]:
import yaml
import torch
import os
import openai
import time
import json
from PIL import Image
from tqdm.auto import tqdm
from datetime import datetime as dt

from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
from getpass import getpass

OPENAI_API_KEY = getpass()
openai.api_key = OPENAI_API_KEY

 ············································


In [31]:
# -- Config Settings
def load_config(config_path,config_name):
    with open(os.path.join(config_path, config_name)) as file:
        config = yaml.safe_load(file)
    return config

config = load_config("../../","config.yaml")


# -- Prompt Path to be read
EVAL_PROMPT_PARENT_PATH = config["prompt"]["eval"]["parent"]
FACTOID_PROMPT_PATH = EVAL_PROMPT_PARENT_PATH + config["prompt"]["eval"]["factoid"]
REASONING_PROMPT_PATH = EVAL_PROMPT_PARENT_PATH + config["prompt"]["eval"]["reasoning"]
COMPLETE_PROMPT_PATH = EVAL_PROMPT_PARENT_PATH + config["prompt"]["eval"]["complete"]


# -- Prompt Reading
with open(FACTOID_PROMPT_PATH) as file:
    FACTOID_PROMPT_TEMPLATE = file.read()
with open(REASONING_PROMPT_PATH) as file:
    REASONING_PROMPT_TEMPLATE = file.read()
with open(COMPLETE_PROMPT_PATH) as file:
    COMPLETE_PROMPT_TEMPLATE = file.read()
    
# -- Result Path to be evaluated
JSON_PARENT= config["eval"]["sample"]["json_parent"]
JSON_FILENAME = config["eval"]["sample"]["json_filename"]
JSON_PATH = JSON_PARENT + JSON_FILENAME
EVAL_ID = JSON_FILENAME.split(".")[0]

In [32]:
def unpack_json(json_file_path):
    try:
        with open(json_file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"Error: File '{json_file_path}' not found.")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON in '{json_file_path}': {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        
unpacked_results = unpack_json(JSON_PATH)

In [33]:
unpacked_results[0]

{'id': 1.0,
 'subject_id': 1.0,
 'img_id': 'sample_001',
 'time': 538.0,
 'question': 'What time does the photo is taken?',
 'short_answer': 'Approximately at afternoon around 1 PM',
 'reasoned_answer': 'Because it is very sunny and there is no existence of cloud'}

In [34]:
# -- Init
ids, factoid_scores, reason_scores = [],[],[]
if "sample" in EVAL_ID:
    subject_ids, img_ids, times = [],[],[]
raw_scores = []


# -- LLM & Prompt Prep
chat = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
fact_sys_template = (
    "You are a helpful evaluator that is responsible to grade answer of a question based on the description"
)
fact_sys_prompt = SystemMessagePromptTemplate.from_template(fact_sys_template)
fact_human_prompt = HumanMessagePromptTemplate.from_template(FACTOID_PROMPT_TEMPLATE)
fact_prompt = ChatPromptTemplate.from_messages(
    [fact_sys_prompt, fact_human_prompt]
)

reason_sys_template = (
    "You are a helpful evaluator that is responsible to grade answer of a question based on the description"
)
reason_sys_prompt = SystemMessagePromptTemplate.from_template(reason_sys_template)
reason_human_prompt = HumanMessagePromptTemplate.from_template(REASONING_PROMPT_TEMPLATE)
reason_prompt = ChatPromptTemplate.from_messages(
    [reason_sys_prompt, reason_human_prompt]
)

# sys_template = (
#     "You are an NLP expert that is responsible to grade answer & reasoning of a question based on the description"
# )
# sys_prompt = SystemMessagePromptTemplate.from_template(sys_template)
# msg_prompt = HumanMessagePromptTemplate.from_template(COMPLETE_PROMPT_TEMPLATE)
# complete_prompt = ChatPromptTemplate.from_messages(
#     [sys_prompt, msg_prompt]
# )



# -- Running Evaluation Callback
with get_openai_callback() as cb:
    for sample in tqdm(unpacked_results):
        curr_id = sample["id"]
        curr_q = sample["question"]
        curr_sa = sample["short_answer"]
        curr_ra = sample["reasoned_answer"]
        
        # Differentiate Sample surveys vs. LMM Eval
        if "sample" in EVAL_ID: 
            curr_subject_id = sample["subject_id"]
            curr_img_id = sample["img_id"]
            curr_time = sample["time"]
        else:
            curr_desc = sample["description"]
        
#         sa_res = chat(
#             fact_prompt.format_prompt(
#                 description = curr_desc,
#                 question = curr_q,
#                 answer = curr_sa
#             ).to_messages()
#         )

#         ra_res = chat(
#             reason_prompt.format_prompt(
#                 description = curr_desc,
#                 question = curr_q,
#                 reason = curr_ra
#             ).to_messages()
#         )
        
        sa_res = chat(
            fact_prompt.format_prompt(
                description = curr_desc,
                question = curr_q,
                answer = curr_sa
            ).to_messages()
        )
        
        ra_res = chat(
            reason_prompt.format_prompt(
                description = curr_desc,
                question = curr_q,
                reason = curr_ra
            ).to_messages()
        )

        ids.append(curr_id)
        if "sample" in EVAL_ID:
            subject_ids.append(curr_subject_id)
            img_ids.append(curr_img_id)
            times.append(curr_time)
        factoid_scores.append(sa_res.content)
        reason_scores.append(ra_res.content)
    
#     with open(f"./bill/{EVAL_ID}.txt", "w") as price_log_file:
#         price_log_file.write(str(cb))


  0%|          | 0/100 [00:00<?, ?it/s]


NameError: name 'curr_desc' is not defined

In [79]:
with open(f"./bill/{EVAL_ID}.txt", "w") as price_log_file:
    price_log_file.write(str(dt.now())+"\n\n")
    price_log_file.write(str(cb))

In [80]:
accs,logics,clears,details,irrels,plauss=[],[],[],[],[],[]

for ID, fs, rs in zip(ids,factoid_scores, reason_scores):
    logic, clear, detail, irrel, plaus = (int(s) for s in rs.split(";"))
    accs.append(int(fs))
    logics.append(logic)
    clears.append(clear)
    details.append(detail)
    irrels.append(irrel)
    plauss.append(plaus)
    

data = [
    {
        "id" : i,
        "accuracy": a,
        "logic" : l,
        "clarity" : c,
        "detail" : d,
        "irrelevance" : ir,
        "plausibility" : p
    }
    for i,a,l,c,d,ir,p in zip(ids,accs,logics,clears,details,irrels,plauss)
]

if "sample" in EVAL_ID:
    data = [
        {
            "id" : i,
            "subject_id" : si,
            "image_id" : ii,
            "time" : t,
            "accuracy": a,
            "logic" : l,
            "clarity" : c,
            "detail" : d,
            "irrelevance" : ir,
            "plausibility" : p
        }
        for i,si,ii,t,a,l,c,d,ir,p in zip(ids, subject_ids, img_ids, times,accs,logics,clears,details,irrels,plauss)
    ]

with open(f"{EVAL_id}.json", 'w') as json_file:
    json.dump(data, json_file, indent=2)