In [1]:
from IPython.display import display
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration

import yaml
import torch
import os

from PIL import Image

def load_config(config_path,config_name):
    with open(os.path.join(config_path, config_name)) as file:
        config = yaml.safe_load(file)
    return config

config = load_config("../","config.yaml")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL_NAME = config["eval"]["result_model_name"]
IMG_DATASET = config["image_dataset"]
IMG_DIR = config["eval"]["img_dir"].format(image_dataset = IMG_DATASET)
RESULT_JSON = config["eval"]["result_json"]
RESULT_PATH = config["eval"]["result_dir"].format(
                                            model_name = MODEL_NAME,
                                            result_json = RESULT_JSON
                                          )

EVALUATOR_MODEL_PATH = config["eval"]["evaluator_model_path"]

## TO BE DECIDED
FACTOID_PROMPT_PATH = config["eval"]["factoid_prompt"]
REASONING_PROMPT_PATH = config["eval"]["reasoning_prompt"]
R0_PATH = config["eval"]["r0"]
R1_PATH = config["eval"]["r1"]
R2_PATH = config["eval"]["r2"]
R3_PATH = config["eval"]["r3"]
R4_PATH = config["eval"]["r4"]

with open(FACTOID_PROMPT_PATH,"r") as file:
    FACTOID_EVAL_PROMPT = file.read()
with open(REASONING_PROMPT_PATH,"r") as file:
    REASONING_EVAL_PROMPT = file.read()

with open(R0_PATH,"r") as file:
    R0_PROMPT = file.read()
with open(R1_PATH,"r") as file:
    R1_PROMPT = file.read()
with open(R2_PATH,"r") as file:
    R2_PROMPT = file.read()
with open(R3_PATH,"r") as file:
    R3_PROMPT = file.read()
with open(R4_PATH,"r") as file:
    R4_PROMPT = file.read()
#####

EVAL_RESULT_PATH = config["eval"]["eval_result_path"]

In [3]:
def unpack_json(json_file_path):
    try:
        with open(json_file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"Error: File '{json_file_path}' not found.")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON in '{json_file_path}': {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


import json
RESULT_PATH = "sdg_out.json"
res_json = unpack_json(RESULT_PATH)

In [4]:
def inference_llava(model, processor, prompt, img, max_new_tokens=500, do_sample=False, skip_special_tokens=True) -> str:
    complete_prompt = f"USER: <image>\n{prompt}\nASSISTANT:"
    
    inputs = processor(
        complete_prompt, 
        img, 
        return_tensors = 'pt'
    ).to(0, torch.float16)
    
    raw_output = model.generate(
        **inputs, 
        max_new_tokens = max_new_tokens, 
        do_sample = do_sample
    )
    
    output = processor.decode(raw_output[0], skip_special_tokens = skip_special_tokens)
    output_trunc = output[output.index("ASSISTANT:") + 11:]
    
    return output_trunc

def exec_time(to, tt) -> str:
    time_difference = tt - to

    hours, remainder = divmod(time_difference.seconds, 3600)
    minutes, seconds = divmod(remainder, 60)

    result_format = f"{hours}h{minutes}m{seconds}s"
    
    return result_format

In [5]:
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration

model = LlavaForConditionalGeneration.from_pretrained(
    EVALUATOR_MODEL_PATH, 
    torch_dtype=torch.float16, 
    low_cpu_mem_usage=True, 
#     load_in_4bit=True
).to(0)
processor = AutoProcessor.from_pretrained(EVALUATOR_MODEL_PATH)

Loading checkpoint shards: 100%|██████████| 6/6 [00:00<00:00,  9.01it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [224]:
from tqdm import tqdm

ids, factoid_scores, reasoning_scores = [], [], []
subject_ids, img_ids, times = [],[],[]

for res in tqdm(res_json):
    # -- Unpacking json
    res_id = res["id"]
    img_id = res["img_id"]
    subject_id = res["subject_id"]
    time = res["time"]
    q = res["question"]
    sa = res["short_answer"]
    ra = res["reasoned_answer"]
    
    # -- Img validation
    IMG_PATH = os.path.join(IMG_DIR, f"{img_id}.jpg")
    
    if os.path.isfile(IMG_PATH):
        raw_image = Image.open(IMG_PATH)
    else:
        print(f"Invalid image {img_id} file")
        continue
#     print()
#     print(FACTOID_EVAL_PROMPT.format(question = q, answer = sa))
    
    # -- Inference process
    raw_factoid_eval_res = inference_llava(
        model, processor,
        FACTOID_EVAL_PROMPT.format(question = q, answer = sa),
        raw_image
    )
    
#     raw_reasoning_eval_res = inference_llava(
#         model, processor,
#         REASONING_EVAL_PROMPT.format(question = q, answer = sa, reason = ra),
#         raw_image
#     )
    Rs = [R0_PROMPT, R1_PROMPT, R2_PROMPT, R3_PROMPT, R4_PROMPT]
    scores = []
    for R in Rs:
        r_eval_res = inference_llava(
            model, processor,
            R.format(question = q, answer = sa, reason = ra),
            raw_image
        )
        scores.append(r_eval_res)
    
    # -- Log result
    ids.append(res_id)
    if "sample" in img_id:
        subject_ids.append(subject_id)
        img_ids.append(img_id)
        times.append(time)
    factoid_scores.append(raw_factoid_eval_res)
    reasoning_scores.append(';'.join(scores))


 10%|█         | 10/100 [00:14<02:09,  1.44s/it]

Invalid image sample_008 file
Invalid image sample_008 file
Invalid image sample_008 file
Invalid image sample_008 file
Invalid image sample_008 file
Invalid image sample_008 file
Invalid image sample_008 file
Invalid image sample_008 file
Invalid image sample_008 file
Invalid image sample_008 file


100%|██████████| 100/100 [02:12<00:00,  1.32s/it]


In [None]:
### this is for sdg_out, please fix!

In [6]:
from tqdm import tqdm

ids, factoid_scores, reasoning_scores = [], [], []

for res in tqdm(res_json):
    # -- Unpacking json
    res_id = res["id"]
    img_id = res["img_id"]
    q = res["question"]
    sa = res["short_answer"]
    ra = res["reasoned_answer"]
    
    # -- Img validation
    IMG_PATH = os.path.join(IMG_DIR, f"{img_id}.jpg")
    
    if os.path.isfile(IMG_PATH):
        raw_image = Image.open(IMG_PATH)
    else:
        print(f"Invalid image {img_id} file")
        continue
    
    # -- Inference process
    raw_factoid_eval_res = inference_llava(
        model, processor,
        FACTOID_EVAL_PROMPT.format(question = q, answer = sa),
        raw_image
    )
    
    Rs = [R0_PROMPT, R1_PROMPT, R2_PROMPT, R3_PROMPT, R4_PROMPT]
    scores = []
    for R in Rs:
        r_eval_res = inference_llava(
            model, processor,
            R.format(question = q, answer = sa, reason = ra),
            raw_image
        )
        scores.append(r_eval_res)
    
    # -- Log result
    ids.append(res_id)
#     if "sample" in img_id:
#         subject_ids.append(subject_id)
#         img_ids.append(img_id)
#         times.append(time)
    factoid_scores.append(raw_factoid_eval_res)
    reasoning_scores.append(';'.join(scores))


100%|██████████| 75/75 [01:58<00:00,  1.57s/it]


In [7]:
accs,logics,clears,details,irrels,plauss=[],[],[],[],[],[]

for ID, fs, rs in zip(ids,factoid_scores, reasoning_scores):
    logic, clear, detail, irrel, plaus = (int(s) for s in rs.split(";"))
    accs.append(int(fs))
    logics.append(logic)
    clears.append(clear)
    details.append(detail)
    irrels.append(irrel)
    plauss.append(plaus)
    

# data = [
#     {
#         "id" : i,
#         "accuracy": a,
#         "logic" : l,
#         "clarity" : c,
#         "detail" : d,
#         "irrelevance" : ir,
#         "plausibility" : p
#     }
#     for i,a,l,c,d,ir,p in zip(ids,accs,logics,clears,details,irrels,plauss)
# ]

data = [
    {
        "id" : i,
        "accuracy": a,
        "logic" : l,
        "clarity" : c,
        "detail" : d,
        "irrelevance" : ir,
        "plausibility" : p
    }
    for i,a,l,c,d,ir,p in zip(ids,accs,logics,clears,details,irrels,plauss)
]

with open(f"sdg_eval.json", 'w') as json_file:
    json.dump(data, json_file, indent=2)