In [2]:
import re
import json
import pandas as pd
from pathlib import Path
from IPython.display import display


# ===== 表示設定 =====
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

# ===== 便利関数 =====
def infer_model_from_path(parts):
    p = "/".join(parts)
    if "meta-llama" in parts and "Meta-Llama-3-8B" in parts:
        return "meta-llama/Meta-Llama-3-8B"
    m = re.search(r"(gpt2[\w-]*)", p)
    if m:
        return m.group(1)
    if "random_emb" in parts:
        return "random_emb"
    if "fasttext" in parts:
        return "fasttext"
    if "gold_binary_balanced" in parts:
        return "gold_binary_balanced"
    return None

def model_family(model):
    if not isinstance(model, str):
        return "other"
    if model.startswith("random_emb"):
        return "random_emb"
    if model.startswith("fasttext"):
        return "fasttext"
    if model.startswith("gpt2"):
        return "gpt2"
    if "Meta-Llama-3-8B" in model:
        return "llama3-8b"
    if model.startswith("meta-llama"):
        return "llama"
    if model.startswith("gold_binary_balanced"):
        return "gold_binary_balanced"
    return "other"

FAMILY_ORDER = {
    "random_emb": 0,
    "fasttext": 1,
    "gpt2": 2,
    "llama3-8b": 3,
    "llama": 3,
    "gold_binary_balanced": 4,
    "other": 9,
}

L0_MODELS = {"random_emb", "fasttext", "gold_binary_balanced"}

def should_include(path_obj: Path, model):
    """
    読み込み対象判定：
    - random_emb / fasttext / gold_binary_balanced: evaluation_results_L0.json のみ
    - Llama-3-8B: BASE_PATH/meta-llama/Meta-Llama-3-8B/template_entity_only/layer_all/evaluation_results_L*.json のみ
    - gpt2: evaluation_results_L*.json
    - その他: evaluation_results.json
    """
    name = path_obj.name
    parts = path_obj.parts  # ルートからのparts（BASE下でのrelativeで後で扱う）
    if model in L0_MODELS:
        return name == "evaluation_results_L0.json"

    # 厳密に Llama-3-8B の所定パスのみ
    if (model and "Meta-Llama-3-8B" in model) or ("meta-llama" in parts and "Meta-Llama-3-8B" in parts):
        return (
            "meta-llama" in parts and
            "Meta-Llama-3-8B" in parts and
            "template_entity_only" in parts and
            "layer_all" in parts and
            re.fullmatch(r"evaluation_results_L\d+\.json", name) is not None
        )

    if model and model.startswith("gpt2"):
        return re.fullmatch(r"evaluation_results_L\d+\.json", name) is not None

    return name == "evaluation_results.json"

def layer_from_filename(path: Path, existing):
    if existing is not None:
        return existing
    m = re.search(r"_L(\d+)\.json$", path.name)
    if m:
        return int(m.group(1))
    return None

def load_all_evaluation_results(base_path):
    base = Path(base_path)
    # まとめて拾ってからフィルタ
    candidates = list(base.rglob("evaluation_results.json")) + list(base.rglob("evaluation_results_L*.json"))

    rows = []
    for jf in candidates:
        try:
            rel = jf.relative_to(base)
            parts = rel.parts[:-1]
            with open(jf, "r") as f:
                data = json.load(f)

            model = data.get("model") or infer_model_from_path(parts)

            # モデル規則に合うもののみ採用
            if not should_include(rel, model):
                continue

            # template：JSONに無ければパスから復元（例：template_entity_only -> entity_only）
            if "template" not in data:
                for p in parts:
                    if p.startswith("template_"):
                        data["template"] = p.replace("template_", "", 1)
                        break

            # layer：JSONに無ければファイル名から復元
            data["layer"] = layer_from_filename(rel, data.get("layer"))
            data["model"] = model  # 念のため上書き

            # gtd/gqd/cophenetic_corr などは元のまま
            rows.append(data)

        except Exception as e:
            print(f"Error loading {jf}: {e}")
            continue

    df = pd.DataFrame(rows)
    sort_keys = [c for c in ["model", "layer"] if c in df.columns]
    if sort_keys:
        df = df.sort_values(sort_keys, kind="mergesort")
    return df

def to_numeric_inplace(df, cols):
    for c in cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

def keep_single_random_emb(df, metric_cols_priority):
    mask = df["model"].astype(str).str.startswith("random_emb", na=False)
    if not mask.any():
        return df
    best_idx = None
    for mc in metric_cols_priority:
        if mc in df.columns and df.loc[mask, mc].notna().any():
            best_idx = df.loc[mask, mc].idxmin()
            break
    if best_idx is None:
        best_idx = df[mask].index[0]
    keep = df.loc[[best_idx]]
    return pd.concat([df[~mask], keep], ignore_index=True)

def best_layer_by_rank_sum(df, selectors, minimize_cols, maximize_cols, fallback_cols):
    target_mask = selectors[0].copy()
    for m in selectors[1:]:
        target_mask |= m
    if not target_mask.any():
        return df

    sub = df.loc[target_mask].copy()
    to_numeric_inplace(sub, list(set(minimize_cols + maximize_cols + fallback_cols)))

    keep_indices = []
    for model_name, g in sub.groupby("model", dropna=False):
        ranks = []
        for c in minimize_cols:
            if c in g.columns and g[c].notna().any():
                ranks.append(g[c].rank(method="min", ascending=True))
        for c in maximize_cols:
            if c in g.columns and g[c].notna().any():
                ranks.append(g[c].rank(method="min", ascending=False))

        if ranks:
            total_rank = pd.concat(ranks, axis=1).sum(axis=1)
            keep_idx = total_rank.idxmin()
        else:
            keep_idx = None
            for fc in fallback_cols:
                if fc in g.columns and g[fc].notna().any():
                    keep_idx = g[fc].idxmin()
                    break
            if keep_idx is None:
                keep_idx = g.index[0]

        keep_indices.append(keep_idx)

    return pd.concat([df[~target_mask], df.loc[keep_indices]], ignore_index=True)


In [3]:

# ===== ユーザー設定 =====
BASE_PATH = "/home/masaki/hierarchical-repr/EntityTree/output/eval_tree"
TEMPLATE_FILTER = "entity_only"

# 指標（存在すれば使う）
MINIMIZE_COLS = ["jrf_k1", "gtd", "gqd"]     # 小さいほど良い
MAXIMIZE_COLS = ["cophenetic_corr"]          # 大きいほど良い
PRIMARY_FALLBACK = ["jrf_k2", "jrf_k1"]      # フォールバック優先度


# ===== メイン処理 =====
df_all = load_all_evaluation_results(BASE_PATH)
print(f"Total evaluations loaded (before filters): {len(df_all)}")
print("Columns available:", list(df_all.columns))

# 表示用の列（full_pathは含めない）
display_cols = [
    "model", "layer", "template",
    "jrf_k1", "jrf_k2",
    "gtd", "gqd", "cophenetic_corr",
]
display_cols = [c for c in display_cols if c in df_all.columns]
df = df_all[display_cols].copy()

# template 絞り（パス復元を反映）
if "template" in df.columns:
    df = df[df["template"] == TEMPLATE_FILTER].copy()

# 数値化
to_numeric_inplace(df, ["jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"])

# 除外
exclude_models = {"gold_binary", "gold_binary_left"}
df = df[~df["model"].isin(exclude_models)].copy()

# random_emb を1つに圧縮
df = keep_single_random_emb(df, metric_cols_priority=PRIMARY_FALLBACK + ["jrf_k1"])

# ファミリー付与＆並び
df["family"] = df["model"].apply(model_family)
df["family_order"] = df["family"].map(FAMILY_ORDER).fillna(9).astype(int)

# ===== LM（gpt2 / Llama-3-8B）をベストレイヤーだけに =====
sel_gpt2 = df["model"].astype(str).str.startswith("gpt2", na=False)
sel_llama8b = df["model"].astype(str).str.contains("Meta-Llama-3-8B", na=False)
df = best_layer_by_rank_sum(
    df,
    selectors=[sel_gpt2, sel_llama8b],
    minimize_cols=MINIMIZE_COLS,
    maximize_cols=MAXIMIZE_COLS,
    fallback_cols=PRIMARY_FALLBACK
)

# 小数第2位に丸め（表示用）
num_cols = [c for c in ["jrf_k1","jrf_k2","gtd","gqd","cophenetic_corr"] if c in df.columns]
df[num_cols] = df[num_cols].round(2)

# 列名のリネーム（矢印含む）
rename_cols = {}
if "jrf_k1" in df.columns: rename_cols["jrf_k1"] = "jrf_k1(↓)"
if "jrf_k2" in df.columns: rename_cols["jrf_k2"] = "jrf_k2(↓)"
if "gtd" in df.columns: rename_cols["gtd"] = "Generalized Triplet Distance (↓)"
if "gqd" in df.columns: rename_cols["gqd"] = "Generalized Quartet Distance (↓)"
if "cophenetic_corr" in df.columns: rename_cols["cophenetic_corr"] = "cophenetic corr (↑)"
df = df.rename(columns=rename_cols)

# 表示名置換
df["model"] = df["model"].replace({
    "gold_binary_balanced": "Reference (Gold binary-balanced)",
    "random_emb": "random_emb (dim=4096)",
})

# 最終並び替えと表示（full_pathは含めない）
sort_keys = ["family_order"]
if "layer" in df.columns:
    sort_keys += ["layer"]
sort_keys += ["model"]
df = df.sort_values(sort_keys, kind="mergesort").drop(columns=["family_order", "family"], errors="ignore")

display(df)


Total evaluations loaded (before filters): 116
Columns available: ['dataset', 'model', 'layer', 'template', 'n_entities', 'jrf_k1', 'jrf_k2', 'gold_internal_nodes', 'pred_internal_nodes', 'td_raw', 'td_norm', 'triplets_total', 'triplets_gold_resolved', 'triplets_pred_resolved', 'gtd', 'triplets_shared_resolved', 'qd_raw', 'qd_norm', 'gqd', 'quartets_total', 'quartets_gold_resolved', 'quartets_pred_resolved', 'caset_distance', 'caset_similarity', 'caset_pairs', 'cophenetic_corr', 'cophenetic_pairs', 'cophenetic_p']


Unnamed: 0,model,layer,template,jrf_k1(↓),jrf_k2(↓),Generalized Triplet Distance (↓),Generalized Quartet Distance (↓),cophenetic corr (↑)
2,random_emb (dim=4096),0,entity_only,91.14,93.3,0.67,0.68,-0.01
0,fasttext,0,entity_only,90.88,93.18,0.65,0.66,0.01
3,gpt2,8,entity_only,88.84,91.58,0.56,0.53,0.05
4,meta-llama/Meta-Llama-3-8B,10,entity_only,86.77,88.93,0.42,0.3,0.19
1,Reference (Gold binary-balanced),0,entity_only,82.0,82.0,0.0,0.0,0.79


In [None]:
# Popularity low

# ===== ユーザー設定 =====
BASE_PATH = "/home/masaki/hierarchical-repr/EntityTree/output/eval_tree_popLow"
TEMPLATE_FILTER = "entity_only"

# 指標（存在すれば使う）
MINIMIZE_COLS = ["jrf_k1", "gtd", "gqd"]     # 小さいほど良い
MAXIMIZE_COLS = ["cophenetic_corr"]          # 大きいほど良い
PRIMARY_FALLBACK = ["jrf_k2", "jrf_k1"]      # フォールバック優先度


# ===== メイン処理 =====
df_all = load_all_evaluation_results(BASE_PATH)
print(f"Total evaluations loaded (before filters): {len(df_all)}")
print("Columns available:", list(df_all.columns))

# 表示用の列（full_pathは含めない）
display_cols = [
    "model", "layer", "template",
    "jrf_k1", "jrf_k2",
    "gtd", "gqd", "cophenetic_corr",
]
display_cols = [c for c in display_cols if c in df_all.columns]
df = df_all[display_cols].copy()

# template 絞り（パス復元を反映）
if "template" in df.columns:
    df = df[df["template"] == TEMPLATE_FILTER].copy()

# 数値化
to_numeric_inplace(df, ["jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"])

# 除外
exclude_models = {"gold_binary", "gold_binary_left"}
df = df[~df["model"].isin(exclude_models)].copy()

# random_emb を1つに圧縮
df = keep_single_random_emb(df, metric_cols_priority=PRIMARY_FALLBACK + ["jrf_k1"])

# ファミリー付与＆並び
df["family"] = df["model"].apply(model_family)
df["family_order"] = df["family"].map(FAMILY_ORDER).fillna(9).astype(int)

# ===== LM（gpt2 / Llama-3-8B）をベストレイヤーだけに =====
sel_gpt2 = df["model"].astype(str).str.startswith("gpt2", na=False)
sel_llama8b = df["model"].astype(str).str.contains("Meta-Llama-3-8B", na=False)
df = best_layer_by_rank_sum(
    df,
    selectors=[sel_gpt2, sel_llama8b],
    minimize_cols=MINIMIZE_COLS,
    maximize_cols=MAXIMIZE_COLS,
    fallback_cols=PRIMARY_FALLBACK
)

# 小数第2位に丸め（表示用）
num_cols = [c for c in ["jrf_k1","jrf_k2","gtd","gqd","cophenetic_corr"] if c in df.columns]
df[num_cols] = df[num_cols].round(2)

# 列名のリネーム（矢印含む）
rename_cols = {}
if "jrf_k1" in df.columns: rename_cols["jrf_k1"] = "jrf_k1(↓)"
if "jrf_k2" in df.columns: rename_cols["jrf_k2"] = "jrf_k2(↓)"
if "gtd" in df.columns: rename_cols["gtd"] = "Generalized Triplet Distance (↓)"
if "gqd" in df.columns: rename_cols["gqd"] = "Generalized Quartet Distance (↓)"
if "cophenetic_corr" in df.columns: rename_cols["cophenetic_corr"] = "cophenetic corr (↑)"
df = df.rename(columns=rename_cols)

# 表示名置換
df["model"] = df["model"].replace({
    "gold_binary_balanced": "Reference (Gold binary-balanced)",
    "random_emb": "random_emb (dim=4096)",
})

# 最終並び替えと表示（full_pathは含めない）
sort_keys = ["family_order"]
if "layer" in df.columns:
    sort_keys += ["layer"]
sort_keys += ["model"]
df = df.sort_values(sort_keys, kind="mergesort").drop(columns=["family_order", "family"], errors="ignore")

display(df)


Total evaluations loaded (before filters): 114
Columns available: ['dataset', 'model', 'layer', 'template', 'n_entities', 'jrf_k1', 'jrf_k2', 'td_raw', 'td_norm', 'triplets_total', 'triplets_gold_resolved', 'triplets_pred_resolved', 'gtd', 'triplets_shared_resolved', 'qd_raw', 'qd_norm', 'gqd', 'quartets_total', 'quartets_gold_resolved', 'quartets_pred_resolved', 'caset_distance', 'caset_similarity', 'caset_pairs', 'cophenetic_corr', 'cophenetic_pairs', 'cophenetic_p', 'gold_internal_nodes', 'pred_internal_nodes']


Unnamed: 0,model,layer,template,jrf_k1(↓),jrf_k2(↓),Generalized Triplet Distance (↓),Generalized Quartet Distance (↓),cophenetic corr (↑)
2,random_emb (dim=4096),0,entity_only,91.36,93.41,0.68,0.68,-0.02
0,fasttext,0,entity_only,90.75,92.98,0.66,0.67,-0.01
3,gpt2,10,entity_only,90.93,93.21,0.66,0.66,0.01
4,meta-llama/Meta-Llama-3-8B,15,entity_only,84.14,85.5,0.15,0.12,0.51
1,Reference (Gold binary-balanced),0,entity_only,82.0,82.0,0.0,0.0,0.79


In [5]:
# Popularity Middle

# ===== ユーザー設定 =====
BASE_PATH = "/home/masaki/hierarchical-repr/EntityTree/output/eval_tree_popMiddle"
TEMPLATE_FILTER = "entity_only"

# 指標（存在すれば使う）
MINIMIZE_COLS = ["jrf_k1", "gtd", "gqd"]     # 小さいほど良い
MAXIMIZE_COLS = ["cophenetic_corr"]          # 大きいほど良い
PRIMARY_FALLBACK = ["jrf_k2", "jrf_k1"]      # フォールバック優先度


# ===== メイン処理 =====
df_all = load_all_evaluation_results(BASE_PATH)
print(f"Total evaluations loaded (before filters): {len(df_all)}")
print("Columns available:", list(df_all.columns))

# 表示用の列（full_pathは含めない）
display_cols = [
    "model", "layer", "template",
    "jrf_k1", "jrf_k2",
    "gtd", "gqd", "cophenetic_corr",
]
display_cols = [c for c in display_cols if c in df_all.columns]
df = df_all[display_cols].copy()

# template 絞り（パス復元を反映）
if "template" in df.columns:
    df = df[df["template"] == TEMPLATE_FILTER].copy()

# 数値化
to_numeric_inplace(df, ["jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"])

# 除外
exclude_models = {"gold_binary", "gold_binary_left"}
df = df[~df["model"].isin(exclude_models)].copy()

# random_emb を1つに圧縮
df = keep_single_random_emb(df, metric_cols_priority=PRIMARY_FALLBACK + ["jrf_k1"])

# ファミリー付与＆並び
df["family"] = df["model"].apply(model_family)
df["family_order"] = df["family"].map(FAMILY_ORDER).fillna(9).astype(int)

# ===== LM（gpt2 / Llama-3-8B）をベストレイヤーだけに =====
sel_gpt2 = df["model"].astype(str).str.startswith("gpt2", na=False)
sel_llama8b = df["model"].astype(str).str.contains("Meta-Llama-3-8B", na=False)
df = best_layer_by_rank_sum(
    df,
    selectors=[sel_gpt2, sel_llama8b],
    minimize_cols=MINIMIZE_COLS,
    maximize_cols=MAXIMIZE_COLS,
    fallback_cols=PRIMARY_FALLBACK
)

# 小数第2位に丸め（表示用）
num_cols = [c for c in ["jrf_k1","jrf_k2","gtd","gqd","cophenetic_corr"] if c in df.columns]
df[num_cols] = df[num_cols].round(2)

# 列名のリネーム（矢印含む）
rename_cols = {}
if "jrf_k1" in df.columns: rename_cols["jrf_k1"] = "jrf_k1(↓)"
if "jrf_k2" in df.columns: rename_cols["jrf_k2"] = "jrf_k2(↓)"
if "gtd" in df.columns: rename_cols["gtd"] = "Generalized Triplet Distance (↓)"
if "gqd" in df.columns: rename_cols["gqd"] = "Generalized Quartet Distance (↓)"
if "cophenetic_corr" in df.columns: rename_cols["cophenetic_corr"] = "cophenetic corr (↑)"
df = df.rename(columns=rename_cols)

# 表示名置換
df["model"] = df["model"].replace({
    "gold_binary_balanced": "Reference (Gold binary-balanced)",
    "random_emb": "random_emb (dim=4096)",
})

# 最終並び替えと表示（full_pathは含めない）
sort_keys = ["family_order"]
if "layer" in df.columns:
    sort_keys += ["layer"]
sort_keys += ["model"]
df = df.sort_values(sort_keys, kind="mergesort").drop(columns=["family_order", "family"], errors="ignore")

display(df)


Total evaluations loaded (before filters): 114
Columns available: ['dataset', 'model', 'layer', 'template', 'n_entities', 'jrf_k1', 'jrf_k2', 'td_raw', 'td_norm', 'triplets_total', 'triplets_gold_resolved', 'triplets_pred_resolved', 'gtd', 'triplets_shared_resolved', 'qd_raw', 'qd_norm', 'gqd', 'quartets_total', 'quartets_gold_resolved', 'quartets_pred_resolved', 'caset_distance', 'caset_similarity', 'caset_pairs', 'cophenetic_corr', 'cophenetic_pairs', 'cophenetic_p', 'gold_internal_nodes', 'pred_internal_nodes']


Unnamed: 0,model,layer,template,jrf_k1(↓),jrf_k2(↓),Generalized Triplet Distance (↓),Generalized Quartet Distance (↓),cophenetic corr (↑)
2,random_emb (dim=4096),0,entity_only,91.01,93.24,0.66,0.65,0.02
0,fasttext,0,entity_only,91.16,93.32,0.67,0.68,-0.0
3,gpt2,4,entity_only,90.91,93.18,0.66,0.66,0.01
4,meta-llama/Meta-Llama-3-8B,31,entity_only,84.79,86.26,0.21,0.2,0.54
1,Reference (Gold binary-balanced),0,entity_only,82.0,82.0,0.0,0.0,0.79


# layerごとの可視化

In [6]:
import json
import re
from pathlib import Path
import pandas as pd
import plotly.graph_objects as go

# ===== 色指定 =====
COLORS = {
    "gpt2": "#d62728",                                   # 赤
    "meta-llama/Meta-Llama-3-8B": "#1f77b4",             # 青
    "random_emb": "#555555",                              # 黒寄りグレー
    "fasttext": "#ff7f0e",                                # オレンジ
    "gold_binary_balanced": "#2ca02c",                    # 緑
}

BASELINE_LINE_WIDTH = 3.5

TEMPLATE_FILTER = "entity_only"  # entity_only のみを対象

# ===== ローダ =====
def load_llama3_8b(base: Path) -> pd.DataFrame:
    root = base / "meta-llama" / "Meta-Llama-3-8B" / "template_entity_only" / "layer_all"
    rows = []
    if root.exists():
        for jf in sorted(root.glob("evaluation_results_L*.json")):
            with open(jf, "r") as f:
                data = json.load(f)
            m = re.search(r"_L(\d+)\.json$", jf.name)
            layer = int(m.group(1)) if m else data.get("layer")
            rows.append({
                "model": "meta-llama/Meta-Llama-3-8B",
                "layer": layer,
                "jrf_k1": data.get("jrf_k1"),
                "jrf_k2": data.get("jrf_k2"),
                "gtd": data.get("gtd"),
                "gqd": data.get("gqd"),
                "cophenetic_corr": data.get("cophenetic_corr"),
                "template": data.get("template", "entity_only"),
            })
    return pd.DataFrame(rows)

def pick_best_gpt2_model(dfs_by_model: dict[str, pd.DataFrame]) -> str | None:
    if not dfs_by_model:
        return None
    if "gpt2" in dfs_by_model:
        return "gpt2"
    return max(dfs_by_model.keys(), key=lambda k: dfs_by_model[k]["layer"].max())

def load_gpt2(base: Path) -> pd.DataFrame:
    dfs_by_model = {}
    for jf in base.rglob("evaluation_results_L*.json"):
        rel_parts = jf.relative_to(base).parts
        if not any(p.startswith("gpt2") for p in rel_parts):
            continue
        # template_entity_only に限定
        if "template_entity_only" not in rel_parts:
            try:
                with open(jf, "r") as f:
                    tmp = json.load(f)
                if tmp.get("template") != "entity_only":
                    continue
            except Exception:
                continue

        with open(jf, "r") as f:
            data = json.load(f)

        model = data.get("model") or next((p for p in rel_parts if p.startswith("gpt2")), "gpt2")
        m = re.search(r"_L(\d+)\.json$", jf.name)
        layer = int(m.group(1)) if m else data.get("layer")

        row = {
            "model": model,
            "layer": layer,
            "jrf_k1": data.get("jrf_k1"),
            "jrf_k2": data.get("jrf_k2"),
            "gtd": data.get("gtd"),
            "gqd": data.get("gqd"),
            "cophenetic_corr": data.get("cophenetic_corr"),
            "template": data.get("template", "entity_only"),
        }
        dfs_by_model.setdefault(model, []).append(row)

    for k in list(dfs_by_model.keys()):
        dfs_by_model[k] = pd.DataFrame(dfs_by_model[k])
    best = pick_best_gpt2_model(dfs_by_model)
    return dfs_by_model.get(best, pd.DataFrame())

def infer_model_from_parts(parts: tuple[str, ...]) -> str | None:
    p = "/".join(parts)
    if "random_emb" in p:
        return "random_emb"
    if "fasttext" in p:
        return "fasttext"
    if "gold_binary_balanced" in p:
        return "gold_binary_balanced"
    return None

def load_singleton_models(base: Path) -> pd.DataFrame:
    """
    random_emb / fasttext / gold_binary_balanced は evaluation_results_L0.json のみ採用。
    template は entity_only のみ。
    """
    rows = []
    for jf in base.rglob("evaluation_results_L0.json"):
        rel = jf.relative_to(base)
        parts = rel.parts[:-1]
        model_guess = infer_model_from_parts(parts)
        if model_guess is None:
            continue
        try:
            with open(jf, "r") as f:
                data = json.load(f)
        except Exception:
            continue
        template = data.get("template") or next((p.replace("template_", "", 1) for p in parts if p.startswith("template_")), None)
        if template != "entity_only":
            continue
        rows.append({
            "model": data.get("model", model_guess),
            "layer": 0,
            "jrf_k1": data.get("jrf_k1"),
            "jrf_k2": data.get("jrf_k2"),
            "gtd": data.get("gtd"),
            "gqd": data.get("gqd"),
            "cophenetic_corr": data.get("cophenetic_corr"),
            "template": "entity_only",
        })
    return pd.DataFrame(rows)


# 表示名
def to_disp(name: str) -> str:
    if name == "meta-llama/Meta-Llama-3-8B":
        return "Llama-3-8B"
    if name == "gpt2":
        return "gpt2"
    if name == "random_emb":
        return "random_emb (dim=4096)"
    if name == "gold_binary_balanced":
        return "Reference (Gold binary-balanced)"
    return name


def get_singleton_value(df_singleton: pd.DataFrame, metric_key: str, raw_name: str):
    if df_singleton.empty or metric_key not in df_singleton.columns:
        return None
    val = df_singleton.loc[df_singleton["model"] == raw_name, metric_key]
    if val.empty:
        return None
    return float(val.astype(float).mean())

def plot_metric(df_metric: pd.DataFrame, metric_key: str, df_singleton: pd.DataFrame):
    label, arrow = METRIC_INFO[metric_key]
    g = df_metric.dropna(subset=[metric_key, "normalized_layer"])
    if g.empty:
        print(f"[warn] {metric_key} のデータが空です。スキップします。")
        return None

    fig = go.Figure()

    # main：gpt2 / Llama-3-8B
    for model_name, sub in g.sort_values(["model", "layer"]).groupby("model"):
        md = sub.iloc[0]["model_disp"]
        color = COLORS.get(model_name)
        fig.add_trace(
            go.Scatter(
                x=sub["normalized_layer"],
                y=sub[metric_key],
                mode="lines+markers",
                name=md,
                line=dict(color=color),
                marker=dict(color=color),
                hovertemplate="Layer(norm)=%{x:.2f}<br>"
                              f"{label}=%{{y:.2f}}<extra>{md}</extra>",
            )
        )

    # 点線の水平線：random_emb / fasttext / Reference (Gold binary-balanced)
    for raw_name in ["random_emb", "fasttext", "gold_binary_balanced"]:
        yv = get_singleton_value(df_singleton, metric_key, raw_name)
        if yv is None:
            continue
        color = COLORS.get(raw_name)
        disp_name = to_disp(raw_name)
        fig.add_trace(
            go.Scatter(
                x=[0.0, 1.0],
                y=[yv, yv],
                mode="lines",
                name=disp_name,
                line=dict(dash="dot", color=color, width=BASELINE_LINE_WIDTH),  # ← 太さを指定
                hovertemplate=f"Layer(norm)=%{{x:.2f}}<br>{label}=%{{y:.2f}}<extra>{disp_name}</extra>",
            )
        )

    fig.update_layout(
        title=f"{label} across normalized layers ({arrow})",
        xaxis_title="Normalized layer (0 → 1)",
        yaxis_title=label,
        legend_title="Model / Dotted refs",
    )
    fig.update_xaxes(range=[0, 1])
    return fig



In [7]:
# ===== データ準備 =====
BASE_PATH = Path("/home/masaki/hierarchical-repr/EntityTree/output/eval_tree")
df_llama = load_llama3_8b(BASE_PATH)
df_gpt2  = load_gpt2(BASE_PATH)
df_single = load_singleton_models(BASE_PATH)   # random_emb / fasttext / gold_binary_balanced（L0のみ）

# main 2モデル
frames = [df for df in [df_gpt2, df_llama] if not df.empty]
if not frames:
    raise RuntimeError("gpt2 / Llama-3-8B のデータが見つかりません。パスやファイルを確認してください。")
df = pd.concat(frames, ignore_index=True)

# 数値化
for c in ["layer", "jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")
if not df_single.empty:
    for c in ["jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
        if c in df_single.columns:
            df_single[c] = pd.to_numeric(df_single[c], errors="coerce")

# entity_only のみ
df = df[df["template"] == "entity_only"].copy()
if not df_single.empty:
    df_single = df_single[df_single["template"] == "entity_only"].copy()

# normalized layer（0層→0.0、最終層→1.0）
df["normalized_layer"] = df.groupby("model")["layer"].transform(
    lambda s: s / s.max() if s.max() and s.max() > 0 else 0
)


df["model_disp"] = df["model"].map(to_disp)
if not df_single.empty:
    df_single["model_disp"] = df_single["model"].map(to_disp)

# ===== 図ユーティリティ =====
METRIC_INFO = {
    "jrf_k1": ("JRF(k=1)", "↓"),
    "gtd": ("Generalized Triplet Distance", "↓"),
    "gqd": ("Generalized Quartet Distance", "↓"),
    "cophenetic_corr": ("cophenetic corr", "↑"),
}


# ===== 4 指標をそれぞれ 1 図で可視化 =====
fig_jrfk1 = plot_metric(df, "jrf_k1", df_single)
fig_gtd   = plot_metric(df, "gtd", df_single)
fig_gqd   = plot_metric(df, "gqd", df_single)
fig_coph  = plot_metric(df, "cophenetic_corr", df_single)

for f in [fig_jrfk1, fig_gtd, fig_gqd, fig_coph]:
    if f is not None:
        f.show()


In [8]:
# ===== データ準備 =====
BASE_PATH = Path("/home/masaki/hierarchical-repr/EntityTree/output/eval_tree_popLow")
df_llama = load_llama3_8b(BASE_PATH)
df_gpt2  = load_gpt2(BASE_PATH)
df_single = load_singleton_models(BASE_PATH)   # random_emb / fasttext / gold_binary_balanced（L0のみ）

# main 2モデル
frames = [df for df in [df_gpt2, df_llama] if not df.empty]
if not frames:
    raise RuntimeError("gpt2 / Llama-3-8B のデータが見つかりません。パスやファイルを確認してください。")
df = pd.concat(frames, ignore_index=True)

# 数値化
for c in ["layer", "jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")
if not df_single.empty:
    for c in ["jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
        if c in df_single.columns:
            df_single[c] = pd.to_numeric(df_single[c], errors="coerce")

# entity_only のみ
df = df[df["template"] == "entity_only"].copy()
if not df_single.empty:
    df_single = df_single[df_single["template"] == "entity_only"].copy()

# normalized layer（0層→0.0、最終層→1.0）
df["normalized_layer"] = df.groupby("model")["layer"].transform(
    lambda s: s / s.max() if s.max() and s.max() > 0 else 0
)


df["model_disp"] = df["model"].map(to_disp)
if not df_single.empty:
    df_single["model_disp"] = df_single["model"].map(to_disp)

# ===== 図ユーティリティ =====
METRIC_INFO = {
    "jrf_k1": ("JRF(k=1)", "↓"),
    "gtd": ("Generalized Triplet Distance", "↓"),
    "gqd": ("Generalized Quartet Distance", "↓"),
    "cophenetic_corr": ("cophenetic corr", "↑"),
}


# ===== 4 指標をそれぞれ 1 図で可視化 =====
fig_jrfk1 = plot_metric(df, "jrf_k1", df_single)
fig_gtd   = plot_metric(df, "gtd", df_single)
fig_gqd   = plot_metric(df, "gqd", df_single)
fig_coph  = plot_metric(df, "cophenetic_corr", df_single)

for f in [fig_jrfk1, fig_gtd, fig_gqd, fig_coph]:
    if f is not None:
        f.show()


In [9]:
# ===== データ準備 =====
BASE_PATH = Path("/home/masaki/hierarchical-repr/EntityTree/output/eval_tree_popMiddle")
df_llama = load_llama3_8b(BASE_PATH)
df_gpt2  = load_gpt2(BASE_PATH)
df_single = load_singleton_models(BASE_PATH)   # random_emb / fasttext / gold_binary_balanced（L0のみ）

# main 2モデル
frames = [df for df in [df_gpt2, df_llama] if not df.empty]
if not frames:
    raise RuntimeError("gpt2 / Llama-3-8B のデータが見つかりません。パスやファイルを確認してください。")
df = pd.concat(frames, ignore_index=True)

# 数値化
for c in ["layer", "jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")
if not df_single.empty:
    for c in ["jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
        if c in df_single.columns:
            df_single[c] = pd.to_numeric(df_single[c], errors="coerce")

# entity_only のみ
df = df[df["template"] == "entity_only"].copy()
if not df_single.empty:
    df_single = df_single[df_single["template"] == "entity_only"].copy()

# normalized layer（0層→0.0、最終層→1.0）
df["normalized_layer"] = df.groupby("model")["layer"].transform(
    lambda s: s / s.max() if s.max() and s.max() > 0 else 0
)


df["model_disp"] = df["model"].map(to_disp)
if not df_single.empty:
    df_single["model_disp"] = df_single["model"].map(to_disp)

# ===== 図ユーティリティ =====
METRIC_INFO = {
    "jrf_k1": ("JRF(k=1)", "↓"),
    "gtd": ("Generalized Triplet Distance", "↓"),
    "gqd": ("Generalized Quartet Distance", "↓"),
    "cophenetic_corr": ("cophenetic corr", "↑"),
}


# ===== 4 指標をそれぞれ 1 図で可視化 =====
fig_jrfk1 = plot_metric(df, "jrf_k1", df_single)
fig_gtd   = plot_metric(df, "gtd", df_single)
fig_gqd   = plot_metric(df, "gqd", df_single)
fig_coph  = plot_metric(df, "cophenetic_corr", df_single)

for f in [fig_jrfk1, fig_gtd, fig_gqd, fig_coph]:
    if f is not None:
        f.show()


# Contextの違い

In [10]:
# ==== Llama-3-8B 固定：templateごとの比較（ハードコード版） ====

TEMPLATE_LIST = [
    "entity_only",
    "gift",
    "occupation_question",
    "occupation_simple",
    "profession_query",
    "professional_intro",
]

def load_llama8b_template_metrics_fixed(base_path: str, layer: int, include_src_path: bool=False) -> pd.DataFrame:
    base = Path(base_path)
    rows = []
    for tmpl in TEMPLATE_LIST:
        jf = base / "meta-llama" / "Meta-Llama-3-8B" / f"template_{tmpl}" / "layer_all" / f"evaluation_results_L{layer}.json"
        if not jf.exists():
            # 必要なら print で欠損を知らせる
            # print(f"[missing] {jf}")
            continue
        try:
            with open(jf, "r") as f:
                data = json.load(f)
        except Exception as e:
            print(f"Error loading {jf}: {e}")
            continue

        # JSONのtemplateは信用せず、パスから上書き
        row = {
            "model": "meta-llama/Meta-Llama-3-8B",
            "layer": int(layer),
            "template": tmpl,
            "jrf_k1": data.get("jrf_k1"),
            "jrf_k2": data.get("jrf_k2"),
            "gtd": data.get("gtd"),
            "gqd": data.get("gqd"),
            "cophenetic_corr": data.get("cophenetic_corr"),
        }
        if include_src_path:
            row["src_path"] = str(jf)
        rows.append(row)

    df = pd.DataFrame(rows)
    if df.empty:
        print("No rows matched. ファイルが存在するか / layer 番号を確認してください。")
        return df

    # 数値化
    to_numeric_inplace(df, ["jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"])

    # 表示順は TEMPLATE_LIST の順
    df["template"] = pd.Categorical(df["template"], categories=TEMPLATE_LIST, ordered=True)
    df = df.sort_values(["template"], kind="mergesort")

    # 列名リネーム（矢印付き）
    rename = {
        "jrf_k1": "jrf_k1(↓)",
        "jrf_k2": "jrf_k2(↓)",
        "gtd": "Generalized Triplet Distance (↓)",
        "gqd": "Generalized Quartet Distance (↓)",
        "cophenetic_corr": "cophenetic corr (↑)",
    }
    df = df.rename(columns=rename)

    display_cols = ["model", "layer", "template",
                    "jrf_k1(↓)", "jrf_k2(↓)",
                    "Generalized Triplet Distance (↓)",
                    "Generalized Quartet Distance (↓)",
                    "cophenetic corr (↑)"]
    if include_src_path:
        display_cols.append("src_path")

    # 必要なら丸める（例：小数第3位）
    num_cols = [c for c in ["jrf_k1(↓)","jrf_k2(↓)","Generalized Triplet Distance (↓)","Generalized Quartet Distance (↓)","cophenetic corr (↑)"] if c in df.columns]
    df[num_cols] = df[num_cols].round(2)

    return df[display_cols]


In [11]:
BASE_PATH = "/home/masaki/hierarchical-repr/EntityTree/output/eval_tree"
layer = 10

df_templates = load_llama8b_template_metrics_fixed(BASE_PATH, layer=layer, include_src_path=True)
display(df_templates)


Unnamed: 0,model,layer,template,jrf_k1(↓),jrf_k2(↓),Generalized Triplet Distance (↓),Generalized Quartet Distance (↓),cophenetic corr (↑),src_path
0,meta-llama/Meta-Llama-3-8B,10,entity_only,86.77,88.93,0.42,0.3,0.19,/home/masaki/hierarchical-repr/EntityTree/output/eval_tree/meta-llama/Meta-Llama-3-8B/template_entity_only/layer_all/evaluation_results_L10.json
1,meta-llama/Meta-Llama-3-8B,10,gift,88.24,90.8,0.5,0.44,0.14,/home/masaki/hierarchical-repr/EntityTree/output/eval_tree/meta-llama/Meta-Llama-3-8B/template_gift/layer_all/evaluation_results_L10.json
2,meta-llama/Meta-Llama-3-8B,10,occupation_question,88.48,91.16,0.47,0.42,0.13,/home/masaki/hierarchical-repr/EntityTree/output/eval_tree/meta-llama/Meta-Llama-3-8B/template_occupation_question/layer_all/evaluation_results_L10.json
3,meta-llama/Meta-Llama-3-8B,10,occupation_simple,88.56,91.27,0.52,0.45,0.11,/home/masaki/hierarchical-repr/EntityTree/output/eval_tree/meta-llama/Meta-Llama-3-8B/template_occupation_simple/layer_all/evaluation_results_L10.json
4,meta-llama/Meta-Llama-3-8B,10,profession_query,88.17,90.95,0.48,0.43,0.12,/home/masaki/hierarchical-repr/EntityTree/output/eval_tree/meta-llama/Meta-Llama-3-8B/template_profession_query/layer_all/evaluation_results_L10.json
5,meta-llama/Meta-Llama-3-8B,10,professional_intro,88.58,91.36,0.5,0.45,0.11,/home/masaki/hierarchical-repr/EntityTree/output/eval_tree/meta-llama/Meta-Llama-3-8B/template_professional_intro/layer_all/evaluation_results_L10.json


In [12]:
import json
import re
from pathlib import Path
import pandas as pd
import plotly.graph_objects as go

# ===== 対象テンプレート（この順で凡例表示） =====
TEMPLATE_LIST = [
    "entity_only",
    "gift",
    "occupation_question",
    "occupation_simple",
    "profession_query",
    "professional_intro",
]

# ===== Llama-3-8B を template ごとにロード =====
def load_llama3_8b_templates(base: Path, templates: list[str]) -> pd.DataFrame:
    rows = []
    for tmpl in templates:
        root = base / "meta-llama" / "Meta-Llama-3-8B" / f"template_{tmpl}" / "layer_all"
        if not root.exists():
            continue
        for jf in sorted(root.glob("evaluation_results_L*.json")):
            try:
                with open(jf, "r") as f:
                    data = json.load(f)
            except Exception:
                continue
            m = re.search(r"_L(\d+)\.json$", jf.name)
            layer = int(m.group(1)) if m else data.get("layer")
            rows.append({
                "model": "meta-llama/Meta-Llama-3-8B",
                "template": tmpl,  # JSON内のtemplateは使わずパス由来を採用
                "layer": layer,
                "jrf_k1": data.get("jrf_k1"),
                "jrf_k2": data.get("jrf_k2"),
                "gtd": data.get("gtd"),
                "gqd": data.get("gqd"),
                "cophenetic_corr": data.get("cophenetic_corr"),
            })
    return pd.DataFrame(rows)

def plot_metric_by_templates(df_metric: pd.DataFrame, metric_key: str):
    label, arrow = METRIC_INFO[metric_key]
    g = df_metric.dropna(subset=[metric_key, "normalized_layer"])
    if g.empty:
        print(f"[warn] {metric_key} のデータが空です。スキップします。")
        return None

    fig = go.Figure()

    # TEMPLATE_LIST の順で描画（存在するものだけ）
    for tmpl in TEMPLATE_LIST:
        sub = g[g["template"] == tmpl].sort_values("layer")
        if sub.empty:
            continue
        fig.add_trace(
            go.Scatter(
                x=sub["normalized_layer"],
                y=sub[metric_key],
                mode="lines+markers",
                name=tmpl,
                hovertemplate="Layer(norm)=%{x:.2f}<br>"
                              f"{label}=%{{y:.2f}}<extra>{tmpl}</extra>",
            )
        )

    fig.update_layout(
        title=f"{MODEL_TITLE}: {label} across normalized layers ({arrow})",
        xaxis_title="Normalized layer (0 → 1)",
        yaxis_title=label,
        legend_title="Template",
    )
    # ここを変更（余白付きレンジ）
    fig.update_xaxes(range=[-0.01, 1.01])
    return fig


In [13]:

# ===== パス設定 =====
BASE_PATH = Path("/home/masaki/hierarchical-repr/EntityTree/output/eval_tree")

# ===== データ準備 =====
df = load_llama3_8b_templates(BASE_PATH, TEMPLATE_LIST)
if df.empty:
    raise RuntimeError("Llama-3-8B の評価ファイルが見つかりません。パスとテンプレートの存在を確認してください。")

# 数値化
for c in ["layer", "jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# 正規化レイヤ（同一モデルなので全体最大でOK）
max_layer = df["layer"].max()
df["normalized_layer"] = df["layer"] / max_layer if pd.notna(max_layer) and max_layer > 0 else 0.0

# ===== 図ユーティリティ =====
METRIC_INFO = {
    "jrf_k1": ("JRF(k=1)", "↓"),
    "gtd": ("Generalized Triplet Distance", "↓"),
    "gqd": ("Generalized Quartet Distance", "↓"),
    "cophenetic_corr": ("cophenetic corr", "↑"),
}
MODEL_TITLE = "Llama-3-8B"

# ===== 指標ごとに 1 図ずつ表示 =====
fig_jrfk1 = plot_metric_by_templates(df, "jrf_k1")
fig_gtd   = plot_metric_by_templates(df, "gtd")
fig_gqd   = plot_metric_by_templates(df, "gqd")
fig_coph  = plot_metric_by_templates(df, "cophenetic_corr")

for f in [fig_jrfk1, fig_gtd, fig_gqd, fig_coph]:
    if f is not None:
        f.show()


In [14]:

# ===== パス設定 =====
BASE_PATH = Path("/home/masaki/hierarchical-repr/EntityTree/output/eval_tree_popLow")

# ===== データ準備 =====
df = load_llama3_8b_templates(BASE_PATH, TEMPLATE_LIST)
if df.empty:
    raise RuntimeError("Llama-3-8B の評価ファイルが見つかりません。パスとテンプレートの存在を確認してください。")

# 数値化
for c in ["layer", "jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# 正規化レイヤ（同一モデルなので全体最大でOK）
max_layer = df["layer"].max()
df["normalized_layer"] = df["layer"] / max_layer if pd.notna(max_layer) and max_layer > 0 else 0.0

# ===== 図ユーティリティ =====
METRIC_INFO = {
    "jrf_k1": ("JRF(k=1)", "↓"),
    "gtd": ("Generalized Triplet Distance", "↓"),
    "gqd": ("Generalized Quartet Distance", "↓"),
    "cophenetic_corr": ("cophenetic corr", "↑"),
}
MODEL_TITLE = "Llama-3-8B"

# ===== 指標ごとに 1 図ずつ表示 =====
fig_jrfk1 = plot_metric_by_templates(df, "jrf_k1")
fig_gtd   = plot_metric_by_templates(df, "gtd")
fig_gqd   = plot_metric_by_templates(df, "gqd")
fig_coph  = plot_metric_by_templates(df, "cophenetic_corr")

for f in [fig_jrfk1, fig_gtd, fig_gqd, fig_coph]:
    if f is not None:
        f.show()


In [15]:

# ===== パス設定 =====
BASE_PATH = Path("/home/masaki/hierarchical-repr/EntityTree/output/eval_tree_popMiddle")

# ===== データ準備 =====
df = load_llama3_8b_templates(BASE_PATH, TEMPLATE_LIST)
if df.empty:
    raise RuntimeError("Llama-3-8B の評価ファイルが見つかりません。パスとテンプレートの存在を確認してください。")

# 数値化
for c in ["layer", "jrf_k1", "jrf_k2", "gtd", "gqd", "cophenetic_corr"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# 正規化レイヤ（同一モデルなので全体最大でOK）
max_layer = df["layer"].max()
df["normalized_layer"] = df["layer"] / max_layer if pd.notna(max_layer) and max_layer > 0 else 0.0

# ===== 図ユーティリティ =====
METRIC_INFO = {
    "jrf_k1": ("JRF(k=1)", "↓"),
    "gtd": ("Generalized Triplet Distance", "↓"),
    "gqd": ("Generalized Quartet Distance", "↓"),
    "cophenetic_corr": ("cophenetic corr", "↑"),
}
MODEL_TITLE = "Llama-3-8B"

# ===== 指標ごとに 1 図ずつ表示 =====
fig_jrfk1 = plot_metric_by_templates(df, "jrf_k1")
fig_gtd   = plot_metric_by_templates(df, "gtd")
fig_gqd   = plot_metric_by_templates(df, "gqd")
fig_coph  = plot_metric_by_templates(df, "cophenetic_corr")

for f in [fig_jrfk1, fig_gtd, fig_gqd, fig_coph]:
    if f is not None:
        f.show()
