In [None]:

MODEL_NAME_MAP = {
    "Mistral": "Mistral",
    "LLaMA": "LLaMA",
    "Gemini": "Gemini",
    "Qwen-8B": "Qwen3-8B",
    "Qwen-14B": "Qwen3-14B",
}


# Fig 2

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import re

plt.rcParams.update({
    "xtick.labelsize": 19,
    "ytick.labelsize": 19,
})


df = pd.read_csv("00.data/accuracy_le_model_FROM_99_FINAL.csv")
languages = ['en', 'de', 'fr', 'es', 'it', 'pt', 'ko', 'ja']


available_models = df['model'].unique()

def find_models(pattern):
    return sorted([m for m in available_models if pattern.lower() in m.lower()])

def get_size(name):
    m = re.search(r'(\d+)B', name)
    return int(m.group(1)) if m else 0

ordered_models = []
ordered_models += find_models("Mistral")
ordered_models += find_models("LLaMA")
ordered_models += find_models("Gemini")
qwen = find_models("Qwen")
qwen.sort(key=get_size)
ordered_models += qwen

DISPLAY_NAME = {
    "Mistral": "Mistral",
    "LLaMA": "LLaMA",
    "Gemini": "Gemini",
    "Qwen-8B": "Qwen3-8B",
    "Qwen-14B": "Qwen-14B",
}

n_cols = len(ordered_models)


BENCH = {
    "ORG": "Original",
    "SS":  "Shuffled QID",
    "PSS": "Shuffled QID by PID",
    "SO":  "Shuffled QSUB",
    "POS": "Shuffled QID by PID",
}

fig, axes = plt.subplots(
    nrows=3,
    ncols=n_cols,
    figsize=(4.8 * n_cols, 14),
    sharex=False,
    sharey=False
)
plt.subplots_adjust(wspace=0.18, hspace=0.35)


def lang_color(lang):
    return 'tab:red' if lang in ['ko', 'ja'] else 'tab:blue'

shuffle_markers = {
    'PSS': 'o',
    'POS': 'D'
}

POINT_SIZE = 220

for col_idx, model in enumerate(ordered_models):
    model_df = df[df['model'] == model]

    ax = axes[0, col_idx]
    ax.plot([0, 1.05], [0, 1.05], ls='--', c='gray', alpha=0.6)

    for lang in languages:
        ss  = model_df[model_df['benchmark'] == BENCH["SS"]][lang].values
        pss = model_df[model_df['benchmark'] == BENCH["PSS"]][lang].values
        if len(ss) and len(pss):
            ax.scatter(
                ss[0], pss[0],
                marker=shuffle_markers['PSS'],
                s=POINT_SIZE,
                color=lang_color(lang),
                edgecolors='white',
                linewidth=1.2
            )

    ax.set_xlim(0.2, 1.02)
    ax.set_ylim(0.2, 1.02)
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    ax.set_xlabel("Accuracy (SS)", fontsize=26, fontweight="bold")
    ax.set_ylabel("Accuracy (PSS)" if col_idx == 0 else "", fontsize=26, fontweight="bold")
    ax.grid(True, linestyle=':', alpha=0.6)

    ax = axes[1, col_idx]
    ax.plot([0, 1.05], [0, 1.05], ls='--', c='gray', alpha=0.6)

    for lang in languages:
        so  = model_df[model_df['benchmark'] == BENCH["SO"]][lang].values
        pos = model_df[model_df['benchmark'] == BENCH["POS"]][lang].values
        if len(so) and len(pos):
            ax.scatter(
                so[0], pos[0],
                marker=shuffle_markers['POS'],
                s=POINT_SIZE,
                color=lang_color(lang),
                edgecolors='white',
                linewidth=1.2
            )

    ax.set_xlim(0.2, 1.02)
    ax.set_ylim(0.2, 1.02)
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    ax.set_xlabel("Accuracy (SO)", fontsize=24, fontweight="bold")
    ax.set_ylabel("Accuracy (POS)" if col_idx == 0 else "", fontsize=24, fontweight="bold")
    ax.grid(True, linestyle=':', alpha=0.6)

    ax = axes[2, col_idx]
    ax.plot([0, 1.05], [0, 1.05], ls='--', c='gray', alpha=0.6)

    for lang in languages:
        org = model_df[model_df['benchmark'] == BENCH["ORG"]][lang].values
        if not len(org):
            continue
        for label in ['PSS', 'POS']:
            vals = model_df[model_df['benchmark'] == BENCH[label]][lang].values
            if len(vals):
                ax.scatter(
                    org[0], vals[0],
                    marker=shuffle_markers[label],
                    s=POINT_SIZE,
                    color=lang_color(lang),
                    edgecolors='white',
                    linewidth=1.2
                )

    ax.set_xlim(0.2, 1.02)
    ax.set_ylim(0.2, 1.02)
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    ax.set_xlabel("Accuracy (ORG)", fontsize=24, labelpad=4, fontweight="bold")
    ax.set_ylabel("Accuracy" if col_idx == 0 else "", fontsize=24, labelpad=4, fontweight="bold")
    ax.grid(True, linestyle=':', alpha=0.6)

    axes[0, col_idx].set_title(
        DISPLAY_NAME.get(model, model),
        fontsize=33,
        fontweight='bold'
    )



legend_elements = [
    Line2D([0], [0], marker='o', color='w', label='Western',
           markerfacecolor='tab:blue', markersize=14),
    Line2D([0], [0], marker='o', color='w', label='Asian (KO, JA)',
           markerfacecolor='tab:red', markersize=14),
    Line2D([0], [0], color='w', label=''),
    Line2D([0], [0], marker='o', color='gray', label='PSS',
           markerfacecolor='gray', markersize=14),
    Line2D([0], [0], marker='D', color='gray', label='POS',
           markerfacecolor='gray', markersize=14),
    Line2D([0], [0], color='gray', lw=2.0, ls='--', label='y = x'),
]

fig.legend(
    handles=legend_elements,
    loc='upper center',
    bbox_to_anchor=(0.5, 1.08),
    ncol=6,
    frameon=False,
    fontsize=35,
    columnspacing=1.4
)

plt.tight_layout(rect=[0, 0, 1, 0.90])
plt.savefig("03.notebooks/0208_result/Fig2/figure2__0208_11.pdf", dpi=300, bbox_inches="tight")
plt.show()


In [None]:
import pandas as pd

# 원본 파일
src_path = "00.data/99.Accuracy.csv"

df = pd.read_csv(src_path)

# 컬럼명 통일
df = df.rename(columns={
    "MODEL": "model",
    "BENCH": "benchmark",
    "EN": "en",
    "DE": "de",
    "FR": "fr",
    "ES": "es",
    "IT": "it",
    "PT": "pt",
    "KO": "ko",
    "JA": "ja",
})

print(df.head())


In [None]:
BENCH_MAP = {
    "ORG": "Original",
    "SS":  "Shuffled QID",
    "OS":  "Shuffled QSUB",       
    "PSS": "Shuffled QID by PID",
    "PSO": "Shuffled QSUB by PID",
}

df["benchmark"] = df["benchmark"].map(BENCH_MAP)

# TOTAL 등 불필요한 행 제거
df = df[~df["benchmark"].isna()]


In [None]:
print(df.groupby(["model", "benchmark"]).size())


In [None]:
assert df.groupby(["model", "benchmark"]).size().eq(1).all()
print("Figure 2 input sanity check: PASS")


In [None]:
import pandas as pd

# ===============================
# 1. Load 99.Accuracy file
# ===============================
df = pd.read_csv("00.data/99.Accuracy.csv")

# ===============================
# 2. Column name normalization
# ===============================
df = df.rename(columns={
    "BENCH": "benchmark",
    "MODEL": "model",
    "EN": "en",
    "DE": "de",
    "FR": "fr",
    "ES": "es",
    "IT": "it",
    "PT": "pt",
    "KO": "ko",
    "JA": "ja",
})

# ===============================
# 3. Benchmark mapping
# ===============================
BENCH_MAP = {
    "ORG": "Original",
    "SS": "Shuffled QID",
    "OS": "Shuffled QSUB",          # ← SO에 해당
    "PSS": "Shuffled QID by PID",
    "PSO": "Shuffled QSUB by PID",
}

df["benchmark"] = df["benchmark"].map(BENCH_MAP)

# TOTAL 등 불필요한 행 제거
df = df[~df["benchmark"].isna()]

# ===============================
# 4. Percentage → 0–1 + 소수점 3자리
# ===============================
lang_cols = ["en", "de", "fr", "es", "it", "pt", "ko", "ja"]
df[lang_cols] = (df[lang_cols].astype(float) / 100.0).round(3)

# ===============================
# 5. Sanity check
# ===============================
print(df.head())
print(df.groupby(["model", "benchmark"]).size())

# ===============================
# 6. Save (Figure 2 ready)
# ===============================
out_path = "00.data/accuracy_le_model_FROM_99_FINAL.csv"
df.to_csv(out_path, index=False)

print(f"Saved normalized file to: {out_path}")
