In [1]:
import os
import json
import pandas as pd
from sklearn.metrics import accuracy_score

def compute_accuracy_per_file(input_data):

    y_true = []
    y_pred = []

    for item in input_data:
        true_label = str(item.get("True Label")).strip()
        model_answer = str(item.get("model_answer_number")).strip()
        y_true.append(true_label)
        y_pred.append(model_answer)

    acc = accuracy_score(y_true, y_pred)
    correct = sum([yt == yp for yt, yp in zip(y_true, y_pred)])
    total = len(y_true)
    summary = {
        "accuracy": round(acc * 100, 2),
        "correct": correct,
        "total": total
    }

    return summary

In [2]:
import json
import re
import numpy as np
from pathlib import Path

folder_name = "['no_context']"
folder_name = "['birth']"
folder_name = "['Nationality']"
folder_name = "['Summary']"
folder_name = "['birth', 'Nationality']"
folder_name = "['birth', 'Summary']"
folder_name = "['Nationality', 'Summary']"
folder_name = "['birth', 'Nationality', 'Summary']"


# "EXAONE-3.5-7.8B-Instruct"
# "EXAONE-3.0-7.8B-Instruct"
# "Llama-3.1-8B-Instruct"
# "Mistral-Nemo-Instruct-2407"
# "gpt-4o"
# "gpt-3.5-turbo-0125"
model_name = "EXAONE-3.5-7.8B-Instruct"
mode = "version3"
base_path = Path('../../data/prediction_data/gpt-4o') / folder_name
base_path = Path(f'../../data/prediction_data/{model_name}_test_data_sample_{mode}') / folder_name
base_path = Path(f'../../data/prediction_data/{model_name}_test_data_fin_korea') / folder_name



In [39]:
def make_table(folder_name):
    # JSON 파일 이름에서 특정 키워드 추출
    keywords = []
    pattern = re.compile(r'(cultural|cross|fact|temporal)')

    data_dict = {}
    for json_file in folder_name.glob('*.json'):
        match = pattern.search(json_file.name)
        with open(json_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
            data_dict[match.group(1)] = data

    character_info = pd.read_json('../../data/source_data/meta_character.json')

    character_info = character_info.reset_index().melt(
        id_vars='index',
        value_vars=['china', 'en', 'korea', 'mexico', 'spain'],
        var_name='country',
        value_name='info'
    ).dropna(subset=['info']).rename(columns={'index': 'character'}).reset_index(drop=True)

    character_info_expanded = pd.concat(
        [character_info.drop(columns=['info']), character_info['info'].apply(pd.Series)],
        axis=1
    )

    result_dict = {}

    for question_type in data_dict:
        result_dict[question_type] = {}

        for country in data_dict[question_type]:
            result_dict[question_type][country] = {}
            for character in data_dict[question_type][country]:
                result_dict[question_type][country][character] = compute_accuracy_per_file(data_dict[question_type][country][character])

    # 정확도 계산 (기존 코드와 동일)
    average_accuracies = {}
    for q_type, countries in result_dict.items():
        average_accuracies[q_type] = {}
        for country, characters in countries.items():
            accuracies = [char_data['accuracy'] for char_data in characters.values()]
            avg_accuracy = sum(accuracies) / len(accuracies) if accuracies else 0
            average_accuracies[q_type][country] = round(avg_accuracy, 2)

    df = pd.DataFrame(average_accuracies).T
    df['Average'] = df.mean(axis=1).round(2)
    df.loc['Average'] = df.mean(axis=0).round(2)

    result_df = pd.DataFrame.from_dict(
        {
            (q_type, country): characters
            for q_type, countries in result_dict.items()
            for country, characters in countries.items()
        },
        orient='index'
    )

    result_df.index.names = ['Question Type', 'Country']

    result_df_reset = result_df.reset_index().melt(
        id_vars=['Question Type', 'Country'],
        var_name='character_name',
        value_name='accuracy_info'
    ).dropna(subset=['accuracy_info'])


    df = result_df_reset.copy()
    df[['accuracy', 'total']] = df['accuracy_info'].apply(pd.Series)[['accuracy', 'total']]


    agg = (
        df
        .groupby(['character_name', 'Question Type'])
        .agg(accuracy=('accuracy', 'mean'),
            total   =('total',    'sum'))
        .unstack('Question Type')   # Question Type을 컬럼으로 펼치기
    )

    new_cols = []
    for metric, qtype in agg.columns:
        if metric == 'accuracy':
            new_cols.append(f"{qtype}_accuracy")
        else:  # total
            new_cols.append(f"{qtype}_num")
    agg.columns = new_cols

    result = agg.reset_index()

    cols_to_check = [col for col in result.columns
                    if ('cross' not in col) and ('temporal' not in col)]

    filtered_result = result.dropna(subset=cols_to_check)
    info_cols = ['character', 'country', 'history', 'time']


    merged_df = (
        filtered_result
        .merge(
            character_info_expanded[info_cols],
            left_on='character_name',
            right_on='character',
            how='left'
        )
        .drop(columns='character')  # 중복된 key 컬럼 제거
    )

    cols_to_nan = [col for col in merged_df.columns if 'cross' in col or 'temporal' in col]
    merged_df.loc[merged_df['time'] == 'present', cols_to_nan] = np.nan

    groupings = {
        'Country': ['country'],
        'History': ['history'],
        'Time': ['time'],
        'History & Time': ['history', 'time']
    }
    markdown_lines = []
    for name, cols in groupings.items():
        # ----- Accuracy Table -----
        acc = merged_df.groupby(cols)[[
            'cross_accuracy', 'cultural_accuracy',
            'fact_accuracy', 'temporal_accuracy'
        ]].mean().round(2)
        acc['row_avg'] = acc.mean(axis=1).round(2)
        acc.loc['col_avg'] = acc.mean().round(2)

        markdown_lines.append(f"### Accuracy by {name}\n")
        markdown_lines.append(acc.to_markdown())
        markdown_lines.append("\n")

        # ----- Num Table -----
        nums = merged_df.groupby(cols)[[
            'cross_num', 'cultural_num',
            'fact_num', 'temporal_num'
        ]].sum()
        nums['row_sum'] = nums.sum(axis=1)
        nums.loc['col_sum'] = nums.sum()

        markdown_lines.append(f"### Num by {name}\n")
        markdown_lines.append(nums.to_markdown())
        markdown_lines.append("\n")

    # result.md 파일로 쓰기
    output_path = folder_name / 'result.md'
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(markdown_lines))

    print(f"✅ 마크다운 파일을 생성했습니다: {output_path}")



In [40]:
input_folder_description = "test_data_fin_korea"

# "EXAONE-3.5-7.8B-Instruct"
# "Qwen3-8B"
# "Llama-3.1-8B-Instruct"
# "Mistral-Nemo-Instruct-2407"
# "gpt-4o"
# "gpt-3.5-turbo-0125"
model_name = "EXAONE-3.5-7.8B-Instruct"
input_folder_name = f"../../data/prediction_data/{model_name}_{input_folder_description}"


In [42]:
model_list = ["EXAONE-3.5-7.8B-Instruct", "Qwen3-8B", "Llama-3.1-8B-Instruct", "Mistral-Nemo-Instruct-2407", "gpt-4o", "gpt-3.5-turbo-0125"]

input_folder_description = "test_data_fin_korea"

from pathlib import Path

for model_name in model_list:
    input_folder_name = f"../../data/prediction_data/{model_name}_{input_folder_description}"

    folder_paths = [folder for folder in Path(input_folder_name).iterdir() if folder.is_dir()]

    for base_path in folder_paths:
        make_table(base_path)

✅ 마크다운 파일을 생성했습니다: ../../data/prediction_data/EXAONE-3.5-7.8B-Instruct_test_data_fin_korea/['birth', 'Nationality', 'Summary']/result.md
✅ 마크다운 파일을 생성했습니다: ../../data/prediction_data/EXAONE-3.5-7.8B-Instruct_test_data_fin_korea/['birth']/result.md
✅ 마크다운 파일을 생성했습니다: ../../data/prediction_data/EXAONE-3.5-7.8B-Instruct_test_data_fin_korea/['Summary']/result.md
✅ 마크다운 파일을 생성했습니다: ../../data/prediction_data/EXAONE-3.5-7.8B-Instruct_test_data_fin_korea/['birth', 'Nationality']/result.md
✅ 마크다운 파일을 생성했습니다: ../../data/prediction_data/EXAONE-3.5-7.8B-Instruct_test_data_fin_korea/['Nationality', 'Summary']/result.md
✅ 마크다운 파일을 생성했습니다: ../../data/prediction_data/EXAONE-3.5-7.8B-Instruct_test_data_fin_korea/['Nationality']/result.md
✅ 마크다운 파일을 생성했습니다: ../../data/prediction_data/EXAONE-3.5-7.8B-Instruct_test_data_fin_korea/['no_context']/result.md
✅ 마크다운 파일을 생성했습니다: ../../data/prediction_data/EXAONE-3.5-7.8B-Instruct_test_data_fin_korea/['birth', 'Summary']/result.md
✅ 마크다운 파일을 생성했습니다: ../../data/