In [2]:
import pandas as pd
import re
import json
from rich import print

from utils import evaluate_ner, basic_post_processing


def parse_ner_output(generated_text, split_by=""):

    segments = generated_text.split("### Entities:" + split_by)
    if len(segments) < 2:
        return []

    after_entities = segments[1].strip()

    entities = []
    for line in after_entities.split("\n"):
        line = line.strip()
        match = re.match(r"^([A-Z]+):\s*(.*)", line)
        if match:
            label, entity_text = match.groups()
            if label in POSSIBLE_ENTITIES:
                entities.append({"label": label, "text": entity_text})

    return json.dumps(entities, ensure_ascii=False)


data_df = pd.read_csv("../data/test.csv")

BASE_PATH = "../results/sft/"
POSSIBLE_ENTITIES = [
    "PERS", "DOC", "QUANT", "ART", "TIME", "JOB",
    "MISC", "PCT", "ORG", "LOC", "PERIOD", "MON", "DATE"
]

In [2]:
files_to_eval = [
    "gemma-2-27b-it_16_ex.csv",
    "gemma-2-27b-it_32_ex.csv",
    "gemma-2-27b-it_64_ex.csv",
    "gemma-3-27b-it_16_ex.csv",
    "gemma-3-27b-it_32_ex.csv",
    "gemma-3-27b-it_64_ex.csv",
    "Qwen2.5-14B-Instruct_16_ex.csv",
    "Qwen2.5-14B-Instruct_32_ex.csv",
    "Qwen2.5-14B-Instruct_64_ex.csv",
]

phi_to_eval = [
    "Phi-4_16_ex.csv",
    "Phi-4_32_ex.csv",
    "Phi-4_64_ex.csv",
]

res = {}
for f_name in files_to_eval:
    df = pd.read_csv(BASE_PATH + f_name)
    data_df["pred"] = df["generated_answer"].apply(parse_ner_output)
    data_df["pred"] = data_df.apply(basic_post_processing, axis=1)
    res[f_name] = evaluate_ner(data_df)["ents_f"]
    
for f_name in phi_to_eval:
    df = pd.read_csv(BASE_PATH + f_name)
    data_df["pred"] = df["generated_answer"].map(lambda x: parse_ner_output(x, "assistant"))
    data_df["pred"] = data_df.apply(basic_post_processing, axis=1)
    res[f_name] = evaluate_ner(data_df)["ents_f"]
     
    
print(json.dumps(res, indent=2))

In [6]:
best_files_to_eval = [
    "Qwen2.5-14B-Instruct_Best.csv",
    "gemma-2-27b-it_Best.csv",
    "gemma-3-27b-it_Best.csv",
]

best_res = {}
for f_name in best_files_to_eval:
    df = pd.read_csv(BASE_PATH + f_name)
    data_df["pred"] = df["generated_answer"].apply(parse_ner_output)
    data_df["pred"] = data_df.apply(basic_post_processing, axis=1)
    print(f_name)
    res = evaluate_ner(data_df)
    print(res["ents_f"])
    print([{k: round(v["f"], 3)} for k, v in res["ents_per_type"].items()])


for f_name in ["Phi-4_Best.csv"]:
    df = pd.read_csv(BASE_PATH + f_name)
    data_df["pred"] = df["generated_answer"].map(lambda x: parse_ner_output(x, "assistant"))
    data_df["pred"] = data_df.apply(basic_post_processing, axis=1)
    print(f_name)
    res = evaluate_ner(data_df)
    print(res["ents_f"])
    print([{k: round(v["f"], 3)} for k, v in res["ents_per_type"].items()])