In [None]:
import pyarrow as pa
import pyarrow.parquet as pq
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from sklearn.neighbors import KernelDensity
from statsmodels.nonparametric.kde import KDEUnivariate
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity
import matplotlib.gridspec as gridspec
from matplotlib.ticker import MultipleLocator
import seaborn as sns


In [None]:
LANGUAGE_LIST = ['en', 'de', 'fr', 'es', 'it', 'pt', 'ko', 'ja']
RESULT_DIR = '../02.results/00.Benchmark_Results/'
RESULT_NAME = ['00.Gemini/', 
               '01.Llama/',
               '02.Mistral/',
               '03.Qwen/Qwen-8B/',
               '03.Qwen/Qwen-14B/']               

BENCHMARK_DIR = '../02.results/00.Benchmark_Results/'
BENCHMARK_NAME = ['00.original_benchmark_TF_500.parquet',
                  '01.subject_shuffled_benchmark_TF_500.parquet',
                  '02.object_shuffled_benchmark_TF_500.parquet',
                  '03.property_scoped_subject_shuffled_benchmark_TF_500.parquet',
                  '04.property_scoped_object_shuffled_benchmark_TF_500.parquet']

BENCHMARK_GEMINI = [pq.read_table(f"{RESULT_DIR}{RESULT_NAME[0]}{name}").to_pandas() for name in BENCHMARK_NAME]
BENCHMARK_LLAMA = [pq.read_table(f"{RESULT_DIR}{RESULT_NAME[1]}{name}").to_pandas() for name in BENCHMARK_NAME]
BENCHMARK_MISTRAL = [pq.read_table(f"{RESULT_DIR}{RESULT_NAME[2]}{name}").to_pandas() for name in BENCHMARK_NAME]
BENCHMARK_QWEN3_8B = [pq.read_table(f"{RESULT_DIR}{RESULT_NAME[3]}{name}").to_pandas() for name in BENCHMARK_NAME]
BENCHMARK_QWEN3_14B = [pq.read_table(f"{RESULT_DIR}{RESULT_NAME[4]}{name}").to_pandas() for name in BENCHMARK_NAME]

BENCHMARK_RESULTS = [BENCHMARK_MISTRAL,
                     BENCHMARK_LLAMA,
                     BENCHMARK_GEMINI,
                     BENCHMARK_QWEN3_8B,
                     BENCHMARK_QWEN3_14B]
MODEL_ORDER = ["Mistral", "LLaMA", "Gemini", "Qwen3-8B", "Qwen3-14B"]


In [None]:
def is_correct(df):
    if df.iloc[0]['kind'] == 'original':
        correct = 0
        wrong = 1
        unsure = 2
    else:
        correct = 1
        wrong = 0
        unsure = 2
    for lang in LANGUAGE_LIST:
        col_response_TF = f"response_TF_{lang}"
        col_correct = f"correct_{lang}"
        correct_list = []
        for response in df[col_response_TF].tolist():
            if response == "True":
                correct_list.append(correct)
            elif response == "<answer>True</answer>":
                correct_list.append(correct)
            elif response == "False":
                correct_list.append(wrong)
            elif response == "<answer>False</answer>":
                correct_list.append(wrong)
            else:
                correct_list.append(unsure)
        df[col_correct] = correct_list
    return df

In [None]:
for model_idx in range(0, len(BENCHMARK_RESULTS)):  
    for bench_idx in range(len(BENCHMARK_RESULTS[model_idx])):
        BENCHMARK_RESULTS[model_idx][bench_idx] = is_correct(BENCHMARK_RESULTS[model_idx][bench_idx])

In [None]:
from gensim.models import KeyedVectors
node_vectors = KeyedVectors.load('/home/ps2575/proj/wikibench_v.1.1_data_backup/00.data/00.wikidata/02.wikidata_embedding/node_vectors.kv')
nodes = list(node_vectors.index_to_key)
X = node_vectors[nodes]
node_to_index = {n: i for i, n in enumerate(nodes)}

In [None]:
def entities_set(BENCHMARK_RESULTS):
    entities = set()
    for model_idx, model_results in enumerate(BENCHMARK_RESULTS):
        for bench_idx, df in enumerate(model_results):
            if "subject" not in df.columns or "object" not in df.columns:
                raise ValueError(
                    f"Missing subject/object column at model {model_idx}, bench {bench_idx}"
                )
    
            entities.update(df["subject"].dropna().astype(str).tolist())
            entities.update(df["object"].dropna().astype(str).tolist())
    
    return entities
    
benchmarks_entities = entities_set(BENCHMARK_RESULTS)
print(f"[Benchmark] 에 사용된 entities: {len(benchmarks_entities):,}")

embedding_nodes = set(node_vectors.index_to_key)
print(f"[Embedding] 에 사용된 entities: {len(embedding_nodes):,}")

missing_entities = benchmarks_entities - embedding_nodes
print(f"[Embedding] 에 누락된 entities: {len(missing_entities):,}")

In [None]:
sns.set(style="whitegrid")
df_org = BENCHMARK_RESULTS[0][0]
df_ss  = BENCHMARK_RESULTS[0][1]
df_so  = BENCHMARK_RESULTS[0][2]
df_pss = BENCHMARK_RESULTS[0][3]
df_pso = BENCHMARK_RESULTS[0][4]
assert (df_org["row_id"].values == df_ss["row_id"].values).all()
assert (df_org["row_id"].values == df_so["row_id"].values).all()
assert (df_org["row_id"].values == df_pss["row_id"].values).all()
assert (df_org["row_id"].values == df_pso["row_id"].values).all()
print("N problems:", len(df_org))
print("columns:", [c for c in ["row_id","subject","object"] if c in df_org.columns])

In [None]:
def safe_similarity(kv, a, b):
    try:
        return float(kv.similarity(str(a), str(b)))
    except Exception:
        return np.nan


In [None]:
sub_x = np.array([safe_similarity(node_vectors, a, b) 
                  for a, b in zip(df_org["subject"], df_ss["subject"])], dtype=float)

sub_y = np.array([safe_similarity(node_vectors, a, b) 
                  for a, b in zip(df_org["subject"], df_pss["subject"])], dtype=float)

df_sub_pair = pd.DataFrame({
    "row_id": df_org["row_id"].values,
    "x_org_ss": sub_x,
    "y_org_pss": sub_y,
})

print("Subject pair NaN rows:", df_sub_pair.isna().any(axis=1).sum())
df_sub_pair.head()


In [None]:
obj_x = np.array([safe_similarity(node_vectors, a, b) 
                  for a, b in zip(df_org["object"], df_so["object"])], dtype=float)

obj_y = np.array([safe_similarity(node_vectors, a, b) 
                  for a, b in zip(df_org["object"], df_pso["object"])], dtype=float)

df_obj_pair = pd.DataFrame({
    "row_id": df_org["row_id"].values,
    "x_org_so": obj_x,
    "y_org_pso": obj_y,
})

print("Object pair NaN rows:", df_obj_pair.isna().any(axis=1).sum())
df_obj_pair.head()


In [None]:
print(plot_sub[["x_org_ss","y_org_pss"]].min())
print(plot_sub[["x_org_ss","y_org_pss"]].max())


In [None]:
print("Object pair min:")
print(plot_obj[["x_org_so", "y_org_pso"]].min())

print("\nObject pair max:")
print(plot_obj[["x_org_so", "y_org_pso"]].max())


In [None]:
def joint_scatter_with_kde(
    fig, outer_spec, df, xcol, ycol,
    xlabel, ylabel, title,
    bw=0.05
):
    """
    중앙: scatter
    위/오른쪽: KDE line
    + y=x 기준선
    + y=x 위/아래 개수 annotation
    """
    gs = gridspec.GridSpecFromSubplotSpec(
        2, 2,
        subplot_spec=outer_spec,
        width_ratios=[4, 1.2],
        height_ratios=[1.2, 4],
        wspace=0.05,
        hspace=0.05
    )

    ax_top   = fig.add_subplot(gs[0, 0])
    ax_joint = fig.add_subplot(gs[1, 0])
    ax_right = fig.add_subplot(gs[1, 1])

    x = df[xcol].to_numpy()
    y = df[ycol].to_numpy()

    # ---------- 중앙 scatter ----------
    ax_joint.scatter(x, y, s=14, alpha=0.35)

    mn = min(x.min(), y.min())
    mx = max(x.max(), y.max())
    ax_joint.plot([mn, mx], [mn, mx], "--", linewidth=1)  # y=x

    ax_joint.set_xlim(mn, mx)
    ax_joint.set_ylim(mn, mx)
    ax_joint.set_xlabel(xlabel)
    ax_joint.set_ylabel(ylabel)
    ax_joint.set_title(title)

    # ---------- y=x 위 / 아래 개수 ----------
    above = np.sum(y > x)
    same = np.sum(y == x)
    below = np.sum(y < x)
    total = len(x)

    txt = (
        f"Y > X : {above} ({above/total:.1%})\n"
        f"Y = X : {same} ({same/total:.1%})\n"
        f"Y < X : {below} ({below/total:.1%})"
    )

    ax_joint.text(
        0.02, 0.98,
        txt,
        transform=ax_joint.transAxes,
        ha="left",
        va="top",
        fontsize=20,
        bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8)
    )

    # ---------- 위: KDE (x) ----------
    xs = np.linspace(mn, mx, 400)
    kde_x = gaussian_kde(x, bw_method=bw)
    ax_top.plot(xs, kde_x(xs), linewidth=1.5)

    ax_top.set_xlim(mn, mx)
    ax_top.set_xticks([])
    ax_top.set_yticks([])
    ax_top.set_ylabel("density")

    # ---------- 오른쪽: KDE (y) ----------
    ys = np.linspace(mn, mx, 400)
    kde_y = gaussian_kde(y, bw_method=bw)
    ax_right.plot(kde_y(ys), ys, linewidth=1.5)

    ax_right.set_ylim(mn, mx)
    ax_right.set_xticks([])
    ax_right.set_yticks([])
    ax_right.set_xlabel("density")

    sns.despine(ax=ax_top, left=True, bottom=True)
    sns.despine(ax=ax_right, left=True, bottom=True)

    return ax_joint


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# Δ 정의
delta_sub = (plot_sub["y_org_pss"] - plot_sub["x_org_ss"]).to_numpy()  # sim(ORG-PSS) - sim(ORG-SS)
delta_obj = (plot_obj["y_org_pso"] - plot_obj["x_org_so"]).to_numpy()  # sim(ORG-PSO) - sim(ORG-SO)

print("delta_sub min/max:", float(delta_sub.min()), float(delta_sub.max()))
print("delta_obj min/max:", float(delta_obj.min()), float(delta_obj.max()))


In [None]:
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
from scipy.stats import gaussian_kde
import numpy as np
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(10, 13), dpi=300)
colors = {
    "delta_sub": "#1f78b4",  # blue (Subject: PSS − SS)
    "delta_obj": "#e31a1c",  # red  (Object: POS − OS)
}

# 공통 x-range 잡기
vals = np.concatenate([delta_sub[np.isfinite(delta_sub)], delta_obj[np.isfinite(delta_obj)]])
mn, mx = float(vals.min()), float(vals.max())
pad = 0.05 * (mx - mn + 1e-12)
xs = np.linspace(mn - pad, mx + pad, 500)
bandwidth = 0.2
kde_s = gaussian_kde(delta_sub[np.isfinite(delta_sub)], bw_method=bandwidth)
kde_o = gaussian_kde(delta_obj[np.isfinite(delta_obj)], bw_method=bandwidth)
ax.xaxis.set_major_locator(MultipleLocator(0.5))
ax.xaxis.set_minor_locator(MultipleLocator(0.1))

ax.yaxis.set_major_locator(MultipleLocator(1))
ax.yaxis.set_minor_locator(MultipleLocator(0.5))

ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))
ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
ax.tick_params(axis='both', labelsize= 50)
ax.plot(xs, kde_s(xs), linewidth=5, label="Δ(PSS−SS)", alpha = 0.9, color = '#ff7f0e')
ax.plot(xs, kde_o(xs), linewidth=5, label="Δ(POS−SO)", alpha = 0.9, color = "#1f77b4")
ax.axvline(0.0, linestyle="--", linewidth=4, color = 'gray', alpha = 1)


ax.set_title(" ", fontsize=30, pad=30)
ax.set_xlabel("Δ Cosine Similarity", fontsize=60, labelpad=40)
ax.set_ylabel(f"Probability Density", fontsize=60, labelpad=30)
ax.set_xlim(-0.9, 1)
ax.set_ylim( 0, 4)
ax.legend(fontsize=50,)
ax.legend(fontsize=50, labelspacing = 0.3, handletextpad = 0.3, handlelength = 1)

ax.grid(which='major', axis='both', linestyle='-', linewidth=2.4, alpha=0.6)
ax.grid(which='minor', axis='both', linestyle=':', linewidth=1.8, alpha=0.4)

plt.tight_layout()
plt.savefig('../02.results/01.Figures/Embedding_KDE_HardEasy.pdf', bbox_inches="tight")

plt.show()



In [None]:
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
from scipy.stats import gaussian_kde
import numpy as np
import matplotlib.pyplot as plt

def plot_kde(ax, values, label, bw=0.1, grid_n=500, lw=2.5, color=None, ls='-'):
    v = np.asarray(values)
    v = v[np.isfinite(v)]
    kde = gaussian_kde(v, bw_method=bw)

    mn, mx = float(v.min()), float(v.max())
    pad = 0.05 * (mx - mn + 1e-12)
    xs = np.linspace(mn - pad, mx + pad, grid_n)

    ax.plot(xs, kde(xs), linewidth=lw, label=label, color=color, linestyle=ls)

fig, ax = plt.subplots(1, 1, figsize=(10, 13), dpi=300)

bandwidth = 0.2

colors = {
    "OS" : "#aec7e8",  
    "POS": "#1f77b4",
    "SS" : "#ffbb78",  
    "PSS": "#ff7f0e",
}

lw = 5
plot_kde(ax, sim_sub_ss, "SS",  bw=bandwidth, lw=lw, color=colors["PSS"],  ls="--")
plot_kde(ax, sim_sub_pss,"PSS", bw=bandwidth, lw=lw, color=colors["PSS"], ls="-")
plot_kde(ax, sim_obj_os, "SO",  bw=bandwidth, lw=lw, color=colors["POS"],  ls="--")
plot_kde(ax, sim_obj_pso,"POS", bw=bandwidth, lw=lw, color=colors["POS"], ls="-")

ax.set_title(" ", fontsize=30, pad=30)
ax.set_xlabel("Cosine Similarity", fontsize=60, labelpad=30)
ax.set_ylabel(f"Probability Density", fontsize=60, labelpad=30)

ax.set_xlim(-0.2, 1.0)
ax.set_ylim(0.0, 4.0)

ax.xaxis.set_major_locator(MultipleLocator(0.5))
ax.xaxis.set_minor_locator(MultipleLocator(0.05))
ax.yaxis.set_major_locator(MultipleLocator(1.0))
ax.yaxis.set_minor_locator(MultipleLocator(0.1))
ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
ax.tick_params(axis='both', labelsize= 50)
ax.axvline(0.0, linestyle="--", linewidth=3, color = 'gray', alpha = 1)

ax.grid(which='major', axis='both', linestyle='-', linewidth=2.4, alpha=0.6)
ax.grid(which='minor', axis='both', linestyle=':', linewidth=1.8, alpha=0.4)

ax.legend(fontsize=50, labelspacing = 0.3, handletextpad = 0.3, handlelength = 1)
plt.tight_layout()
plt.savefig('../02.results/01.Figures/Embedding_KDE.pdf', bbox_inches="tight")
plt.show()


###### 스킵