In [None]:
import os
import json
import itertools
import numpy as np
from collections import defaultdict
from pathlib import Path
import torch
import matplotlib.pyplot as plt
from collections import deque, Counter
from scipy.stats import pearsonr
import pandas as pd

from detectron2.utils import comm
from detectron2.evaluation import DatasetEvaluator
from detectron2.structures import pairwise_iou
from scipy.optimize import linear_sum_assignment
from apted import APTED, Config
import statsmodels

In [None]:
# self._records.append(
#     dict(
#         file_name = os.path.basename(inp["file_name"]),
#         image_id  = inp["image_id"],
#         ted       = ted,
#         steds     = steds,
#         reds      = reds
#         sim       = sim,
#         gt        = gt,
#         pred      = pred,
#         gt_tree   = gt_tree,
#         pred_tree = pred_tree,
#     )
# )

In [None]:
class TreeNode:
    def __init__(self, id, label, bbox, category):
        self.id = id
        self.label = label
        self.bbox = bbox
        self.category = category
        # List[TreeNode]
        self.children = []

In [None]:
os.makedirs("./figures", exist_ok=True)

In [None]:
# ハイパーパラメータの順序指定
backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "internimage_base_4scale", "dit_base"]
methods = ["DRGG", "DRGGBBoxEmbTFEnc"]
# beam_widths = [1, 5, 10, 15, 20, 25, 30]
beam_widths = [1, 20]

# 出力ディレクトリ
base_dir = "./output_20250710"

# 結果保存用リスト
summary_records = []

# すべての予測を格納する辞書
all_preds_dict = {}

for backbone in backbones:
    for method in methods:
        for beam_width in beam_widths:
            file_path = os.path.join(base_dir, f"gtbbox_{backbone}_{method}", f"tree_predictions.bw{beam_width}.pt")

            if not os.path.exists(file_path):
                print(f"[WARN] Not found: {file_path}")
                continue

            try:
                preds = torch.load(file_path)
                all_preds_dict[(backbone, method, beam_width)] = preds  # ← 保存
            except Exception as e:
                print(f"[ERROR] Could not load {file_path}: {e}")
                continue

            ted_sum = steds_sum = reds_sum = 0.0
            count = 0
            for pred in preds:
                if all(k in pred for k in ["ted", "steds", "reds"]):
                    ted_sum += pred["ted"]
                    steds_sum += pred["steds"]
                    reds_sum += pred["reds"]
                    count += 1

            if count == 0:
                continue

            summary_records.append({
                "Backbone": backbone,
                "Method": method,
                "Beam Width": beam_width,
                "STEDS": steds_sum / count * 100,
                "REDS": reds_sum / count * 100,
                "TED": ted_sum / count,
            })

# データフレーム化＆整形
df = pd.DataFrame(summary_records)
df = df.sort_values(by=["Backbone", "Method", "Beam Width"]).reset_index(drop=True)

# LaTeX形式で出力
latex_table = df.to_latex(index=False, float_format="%.2f", column_format="lllrrr", escape=False)

print(latex_table)

In [None]:
from scipy.stats import wilcoxon

def get_valid_scores(preds, key):
    return [p[key] for p in preds if all(k in p for k in ["ted", "steds", "reds"])]

def add_wilcoxon_stars_against_base(df, all_preds_dict):
    df_new = df.copy()
    marked_rows = []

    for idx, row in df.iterrows():
        backbone = row["Backbone"]
        method = row["Method"]
        beam_width = row["Beam Width"]

        # 基準設定かどうか
        if method == "DRGG" and beam_width == 1:
            marked_rows.append(row)
            continue

        key_base = (backbone, "DRGG", 1)
        key_target = (backbone, method, beam_width)

        if key_base not in all_preds_dict or key_target not in all_preds_dict:
            marked_rows.append(row)
            continue

        base_preds = all_preds_dict[key_base]
        target_preds = all_preds_dict[key_target]

        out_row = row.copy()

        for key in ["TED", "STEDS", "REDS"]:
            metric = key.lower()
            base_vals = get_valid_scores(base_preds, metric)
            target_vals = get_valid_scores(target_preds, metric)

            if len(base_vals) == 0 or len(target_vals) == 0:
                continue

            n = min(len(base_vals), len(target_vals))
            try:
                _, p_val = wilcoxon(base_vals[:n], target_vals[:n])
                assert p_val < 0.05
                val = row[key]
                out_row[key] = f"{val:.2f}*" if isinstance(val, float) else f"{val}*"
            except Exception as e:
                print(f"[WARN] Wilcoxon failed for {key_base} vs {key_target}: {e}")
                continue

        marked_rows.append(out_row)

    df_marked = pd.DataFrame(marked_rows)
    return df_marked

# Wilcoxon結果を DRGG, bw=1 をベースとして反映
df_marked = add_wilcoxon_stars_against_base(df, all_preds_dict)

def get_decoder_display_name(method: str, beam_width: int) -> str:
    if method == "DRGG":
        return "DRGG" if beam_width == 1 else "DRGG-BS"
    elif method == "DRGGBBoxEmbTFEnc":
        return "DRGG-BE" if beam_width == 1 else "DRGG-BEBS"
    else:
        raise ValueError(f"Unknown method: {method}")

backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage"
}

backbone_display_order = [
    "ResNet-50", "ViT", "Swin", "DiT", "InternImage"
]

def get_decoder_sort_key(method: str, beam_width: int) -> int:
    if method == "DRGG" and beam_width == 1:
        return 1
    elif method == "DRGGBBoxEmbTFEnc" and beam_width == 1:
        return 2
    elif method == "DRGG" and beam_width > 1:
        return 3
    elif method == "DRGGBBoxEmbTFEnc" and beam_width > 1:
        return 4
    else:
        return 99  # fallback

def format_latex_table(df):
    df["BackboneDisplay"] = df["Backbone"].map(backbone_name_map)
    df["Decoder"] = df.apply(lambda row: get_decoder_display_name(row["Method"], row["Beam Width"]), axis=1)
    df["DecoderOrder"] = df.apply(lambda row: get_decoder_sort_key(row["Method"], row["Beam Width"]), axis=1)

    lines = []
    lines.append(r"\begin{tabular}{ll|rrr}")
    lines.append(r"\toprule")
    lines.append(r"Backbone & Decoder & STEDS & REDS & TED \\")
    lines.append(r"\midrule")

    for i, backbone_disp in enumerate(backbone_display_order):
        group = df[df["BackboneDisplay"] == backbone_disp].copy()
        group = group.sort_values("DecoderOrder").reset_index(drop=True)
        for j, row in group.iterrows():
            decoder = row["Decoder"]
            steds = _format_val(row["STEDS"])
            reds = _format_val(row["REDS"])
            ted = _format_val(row["TED"])

            if j == 0:
                lines.append(rf"\multirow{{{len(group)}}}{{*}}{{{backbone_disp}}} & {decoder} & {steds} & {reds} & {ted} \\")
            else:
                lines.append(rf"    & {decoder} & {steds} & {reds} & {ted} \\")
        if i < len(backbone_display_order) - 1:
            lines.append(r"\midrule")
    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")

    return "\n".join(lines)

def _format_val(val):
    if isinstance(val, str) and val.endswith("*"):
        return f"{val[:-1]}$^{{\\ast}}$"
    return f"{val:.2f}" if isinstance(val, float) else str(val)

# 出力
latex_table = format_latex_table(df_marked)
print(latex_table)

In [None]:
import pandas as pd

# 条件に合うデータを抽出
target_method = "DRGGBBoxEmbTFEnc"
target_beam_width = 20

records = []

for (backbone, method, beam_width), preds in all_preds_dict.items():
    if method == target_method and beam_width == target_beam_width:
        for pred in preds:
            if "steds" in pred and "image_id" in pred:
                records.append({
                    "File Name": pred["file_name"],
                    "STEDS": pred["steds"]
                })

# DataFrame に変換
df = pd.DataFrame(records)

# image_id ごとに STEDS を平均
df_mean = df.groupby("File Name", as_index=False).agg({"STEDS": "mean"})

# 昇順にソート
df_sorted = df_mean.sort_values(by="STEDS", ascending=True).reset_index(drop=True)

# 表示
df_sorted

N = 100  # 任意の上位件数に設定（例：20件）

# 上位 N 件を抽出
top_n_df = df_sorted.head(N)

# 表示
print(top_n_df.to_string())


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict

# 評価指標とキー対応
metrics = ["TED", "STEDS", "REDS"]
metric_keys = {"TED": "ted", "STEDS": "steds", "REDS": "reds"}

# backbone 表示ラベル変換
backbone_labels = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "internimage_base_4scale": "InternImage",
    "dit_base": "DiT"
}

# method 表示とハッチパターン
method_hatch_map = {
    "DRGG": None,
    "DRGG-BE": "//",
    "DRGG-BS": "\\\\",
    "DRGG-BEBS": "xx"
}

beam_widths = [1, 20]
backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "dit_base", "internimage_base_4scale"]
methods = ["DRGG", "DRGGBBoxEmbTFEnc"]

# 色設定
color_list = plt.cm.tab20.colors

for m in metrics:
    label_to_values = OrderedDict()  # 順序を保持
    label_to_hatch = OrderedDict()

    for backbone in backbones:
        backbone_display = backbone_labels.get(backbone, backbone)
        for beam_width in beam_widths:
            for method in methods:        
                key = (backbone, method, beam_width)
                if key not in all_preds_dict:
                    continue

                preds = all_preds_dict[key]
                values = []
                for pred in preds:
                    k = metric_keys[m]
                    if k in pred and pred[k] is not None:
                        value = pred[k] * 100 if m in ("STEDS", "REDS") else pred[k]
                        values.append(value)

                if method == "DRGG":
                    method_display = "DRGG-BS" if beam_width == 20 else "DRGG"
                elif method == "DRGGBBoxEmbTFEnc":
                    method_display = "DRGG-BEBS" if beam_width == 20 else "DRGG-BE"
                else:
                    method_display = method

                label = f"{backbone_display}-{method_display}"
                label_to_values[label] = values
                label_to_hatch[label] = method_hatch_map.get(method_display, None)

    # 描画設定
    bins = np.arange(0, 110, 10)
    bin_width = 9
    num_labels = len(label_to_values)
    bar_width = bin_width / (num_labels + 1)

    plt.figure(figsize=(14, 6))

    for i, (label, values) in enumerate(label_to_values.items()):
        counts, _ = np.histogram(values, bins=bins)
        bin_positions = bins[:-1] + i * bar_width

        plt.bar(
            bin_positions,
            counts,
            width=bar_width,
            label=label,
            color=color_list[i % len(color_list)],
            hatch=label_to_hatch[label],
            edgecolor='black',
            linewidth=0.5
        )

    plt.yscale("log", base=2)
    plt.xlabel(f"{m}", fontsize=14)
    plt.ylabel("Log-scale Frequency", fontsize=14)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.grid(True, linestyle="--", alpha=0.5)

    legend_loc = "upper left" if m in ("STEDS", "REDS") else "upper right"
    plt.legend(fontsize=10, title="Backbone-Decoder", title_fontsize=10, loc=legend_loc)

    plt.tight_layout()
    plt.savefig(f"./figures/{m.lower()}_histogram.png", dpi=300, bbox_inches='tight')
    plt.savefig(f"./figures/{m.lower()}_histogram.eps", format="eps", dpi=300, bbox_inches='tight')
    plt.show()


In [None]:
# import matplotlib.pyplot as plt
# import pandas as pd

# # Backbone 表示名変換
# backbone_labels = {
#     "r50_4scale": "ResNet-50",
#     "vitdet_base_4scale": "ViT",
#     "swin_base_384_4scale": "Swin",
#     "internimage_base_4scale": "InternImage",
#     "dit_base": "DiT"
# }
# backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "dit_base", "internimage_base_4scale"]

# # Method 表示名変換（BS 表示なし）
# method_display_map = {
#     "DRGG": "DRGG",
#     "DRGGBBoxEmbTFEnc": "DRGG-BE"
# }

# # 対象のビーム幅
# target_beam_widths = [1, 5, 10, 15, 20, 25, 30]
# df_plot = df[df["Beam Width"].isin(target_beam_widths)].copy()

# # 表示用列を追加
# df_plot["Backbone Display"] = df_plot["Backbone"].map(backbone_labels)
# df_plot["Method Display"] = df_plot["Method"].map(method_display_map)
# df_plot["Legend Label"] = df_plot["Backbone Display"] + "-" + df_plot["Method Display"]

# # 表示順に並べるためのキー
# ordered_labels = []
# for b in backbones:
#     b_disp = backbone_labels[b]
#     for m in ["DRGG", "DRGGBBoxEmbTFEnc"]:
#         m_disp = method_display_map[m]
#         ordered_labels.append(f"{b_disp}-{m_disp}")

# # 指標と色
# metrics = ["STEDS", "REDS", "TED"]
# colors = ["tab:blue", "tab:orange", "tab:green"]

# for metric, color in zip(metrics, colors):
#     plt.figure(figsize=(8, 5))
#     for label in ordered_labels:
#         group = df_plot[df_plot["Legend Label"] == label]
#         if group.empty:
#             continue
#         group = group.sort_values("Beam Width")
#         plt.plot(group["Beam Width"], group[metric],
#                  marker='o', label=label)

#     plt.xlabel("Beam Width", fontsize=14)
#     plt.ylabel(metric, fontsize=14)
#     plt.xticks(fontsize=14)
#     plt.yticks(fontsize=14)
#     plt.ylim(bottom=0)
#     plt.title(f"{metric} vs Beam Width", fontsize=14)
#     plt.grid(True, linestyle="--", alpha=0.6)

#     # 凡例位置調整
#     if metric in ("STEDS", "REDS"):
#         legend_loc = "lower right"
#         plt.legend(title="Backbone-Decoder", loc=legend_loc)
#     elif metric == "TED":
#         # 左下から少し右上にずらす
#         plt.legend(title="Backbone-Decoder", loc="lower left", bbox_to_anchor=(0.2, 0.0))

#     plt.tight_layout()
#     plt.savefig(f"./figures/{metric.lower()}_beamwidth.png", dpi=300, bbox_inches='tight')
#     plt.savefig(f"./figures/{metric.lower()}_beamwidth.eps", format="eps", dpi=300, bbox_inches='tight')
#     plt.show()

In [None]:
# 追加メソッド
backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "internimage_base_4scale", "dit_base"]
methods_to_add = ["DRGGTextEmbTFEnc", "DRGGBBoxEmbTextEmbTFEnc"]
beam_widths = [1, 20]

# 保存用
new_summary_records = []

for backbone in backbones:
    for method in methods_to_add:
        for beam_width in beam_widths:
            key = (backbone, method, beam_width)
            if key in all_preds_dict:
                preds = all_preds_dict[key]
            else:
                file_path = os.path.join(base_dir, f"gtbbox_{backbone}_{method}", f"tree_predictions.bw{beam_width}.pt")
                if not os.path.exists(file_path):
                    print(f"[WARN] Not found: {file_path}")
                    continue
                try:
                    preds = torch.load(file_path)
                    all_preds_dict[key] = preds
                except Exception as e:
                    print(f"[ERROR] Could not load {file_path}: {e}")
                    continue

            ted_sum = steds_sum = reds_sum = 0.0
            count = 0
            for pred in preds:
                if all(k in pred for k in ["ted", "steds", "reds"]):
                    ted_sum += pred["ted"]
                    steds_sum += pred["steds"]
                    reds_sum += pred["reds"]
                    count += 1
            if count == 0:
                continue
            new_summary_records.append({
                "Backbone": backbone,
                "Method": method,
                "Beam Width": beam_width,
                "STEDS": steds_sum / count * 100,
                "REDS": reds_sum / count * 100,
                "TED": ted_sum / count,
            })

# --- 新旧まとめて結合・整形 ---
df_new = pd.DataFrame(summary_records + new_summary_records)
df_new = df_new.sort_values(by=["Backbone", "Method", "Beam Width"]).reset_index(drop=True)


In [None]:
# --- LaTeX 出力 ---
latex_table = df_new.to_latex(
    index=False,
    float_format="%.2f",
    column_format="lllrrr",
    escape=False,
    caption="STEDS, REDS, and TED scores per Backbone, Method, and Beam Width.",
    label="tab:tree_scores_extended"
)
print("\n=== LaTeX ===")
print(latex_table)


In [None]:
from scipy.stats import wilcoxon
import pandas as pd

# === 1. Wilcoxon: DRGG(bw=1) を基準に有意差を付加 ===
def get_valid_scores(preds, key):
    return [p[key] for p in preds if all(k in p for k in ["ted", "steds", "reds"])]

def add_wilcoxon_stars_against_base(df, all_preds_dict):
    marked_rows = []

    for _, row in df.iterrows():
        backbone = row["Backbone"]
        method = row["Method"]
        beam_width = row["Beam Width"]

        key_target = (backbone, method, beam_width)
        key_base = (backbone, "DRGG", 1)

        if method == "DRGG" and beam_width == 1:
            marked_rows.append(row)
            continue

        if key_base not in all_preds_dict or key_target not in all_preds_dict:
            marked_rows.append(row)
            continue

        base_preds = all_preds_dict[key_base]
        target_preds = all_preds_dict[key_target]

        out_row = row.copy()

        for metric_name in ["TED", "STEDS", "REDS"]:
            metric = metric_name.lower()
            base_vals = get_valid_scores(base_preds, metric)
            target_vals = get_valid_scores(target_preds, metric)

            if len(base_vals) == 0 or len(target_vals) == 0:
                continue

            n = min(len(base_vals), len(target_vals))
            try:
                _, p_val = wilcoxon(base_vals[:n], target_vals[:n])
                if p_val < 0.05:
                    val = row[metric_name]
                    out_row[metric_name] = f"{val:.2f}*" if isinstance(val, float) else f"{val}*"
            except Exception as e:
                print(f"[WARN] Wilcoxon failed for {key_base} vs {key_target}: {e}")
                continue

        marked_rows.append(out_row)

    return pd.DataFrame(marked_rows)

# === 2. 表示設定 ===
decoder_display_map = {
    ("DRGG", 1): "DRGG",
    ("DRGGBBoxEmbTFEnc", 1): "DRGG-BE",
    ("DRGG", 20): "DRGG-BS",
    ("DRGGBBoxEmbTFEnc", 20): "DRGG-BEBS",
    ("DRGGTextEmbTFEnc", 1): "DRGG-TE",
    ("DRGGTextEmbTFEnc", 20): "DRGG-TEBS",
    ("DRGGBBoxEmbTextEmbTFEnc", 1): "DRGG-BETE",
    ("DRGGBBoxEmbTextEmbTFEnc", 20): "DRGG-BETEBS"
}

decoder_order = ["DRGG", "DRGG-BE", "DRGG-BS", "DRGG-BEBS", "DRGG-TE", "DRGG-TEBS", "DRGG-BETE", "DRGG-BETEBS"]

backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage"
}

backbone_display_order = ["ResNet-50", "ViT", "Swin", "DiT", "InternImage"]

# === 3. LaTeX テーブル整形 ===
def _format_val(val):
    if isinstance(val, str) and val.endswith("*"):
        return f"{val[:-1]}$^{{\\ast}}$"
    return f"{val:.2f}" if isinstance(val, float) else str(val)

def format_latex_table(df):
    lines = []
    lines.append(r"\begin{tabular}{ll|rrr}")
    lines.append(r"\toprule")
    lines.append(r"Backbone & Decoder & STEDS & REDS & TED \\")
    lines.append(r"\midrule")

    for i, backbone_disp in enumerate(backbone_display_order):
        group = df[df["BackboneDisplay"] == backbone_disp].copy()
        group = group.sort_values("DecoderOrder").reset_index(drop=True)
        for j, row in group.iterrows():
            decoder = row["Decoder"]
            steds = _format_val(row["STEDS"])
            reds = _format_val(row["REDS"])
            ted = _format_val(row["TED"])
            if j == 0:
                lines.append(rf"\multirow{{{len(group)}}}{{*}}{{{backbone_disp}}} & {decoder} & {steds} & {reds} & {ted} \\")
            else:
                lines.append(rf"    & {decoder} & {steds} & {reds} & {ted} \\")
        if i < len(backbone_display_order) - 1:
            lines.append(r"\midrule")
    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    return "\n".join(lines)

# === 4. 実行 ===
# df_new を元にフィルタ（対象は DRGG(bw=1) + 各 method bw=20）
df_filtered = df_new.copy()
df_filtered["Decoder"] = df_filtered.apply(
    lambda row: decoder_display_map.get((row["Method"], row["Beam Width"])), axis=1
)
df_filtered = df_filtered[df_filtered["Decoder"].notnull()].copy()
df_filtered["DecoderOrder"] = df_filtered["Decoder"].apply(lambda d: decoder_order.index(d))
df_filtered["BackboneDisplay"] = df_filtered["Backbone"].map(backbone_name_map)

# 有意差付加
df_marked = add_wilcoxon_stars_against_base(df_filtered, all_preds_dict)

# LaTeX出力
latex_table = format_latex_table(df_marked)
print(latex_table)


In [None]:
# # 追加メソッド
# methods_to_add = ["DRGGPositionEmbTFEnc", "DRGGClassEmbTFEnc"]
# beam_widths = [1, 20]

# # 保存用
# new_summary_records = []

# for backbone in backbones:
#     for method in methods_to_add:
#         for beam_width in beam_widths:
#             key = (backbone, method, beam_width)
#             if key in all_preds_dict:
#                 preds = all_preds_dict[key]
#             else:
#                 file_path = os.path.join(base_dir, f"gtbbox_{backbone}_{method}", f"tree_predictions.bw{beam_width}.pt")
#                 if not os.path.exists(file_path):
#                     print(f"[WARN] Not found: {file_path}")
#                     continue
#                 try:
#                     preds = torch.load(file_path)
#                     all_preds_dict[key] = preds
#                 except Exception as e:
#                     print(f"[ERROR] Could not load {file_path}: {e}")
#                     continue

#             ted_sum = steds_sum = reds_sum = 0.0
#             count = 0
#             for pred in preds:
#                 if all(k in pred for k in ["ted", "steds", "reds"]):
#                     ted_sum += pred["ted"]
#                     steds_sum += pred["steds"]
#                     reds_sum += pred["reds"]
#                     count += 1
#             if count == 0:
#                 continue
#             new_summary_records.append({
#                 "Backbone": backbone,
#                 "Method": method,
#                 "Beam Width": beam_width,
#                 "STEDS": steds_sum / count * 100,
#                 "REDS": reds_sum / count * 100,
#                 "TED": ted_sum / count,
#             })

# # --- 新旧まとめて結合・整形 ---
# df_new = pd.DataFrame(summary_records + new_summary_records)
# df_new = df_new.sort_values(by=["Backbone", "Method", "Beam Width"]).reset_index(drop=True)

# # --- LaTeX 出力 ---
# latex_table = df_new.to_latex(
#     index=False,
#     float_format="%.2f",
#     column_format="lllrrr",
#     escape=False,
#     caption="STEDS, REDS, and TED scores per Backbone, Method, and Beam Width.",
#     label="tab:tree_scores_extended"
# )
# print("\n=== LaTeX ===")
# print(latex_table)


In [None]:
from collections import defaultdict, Counter
from math import atan2, degrees
import pandas as pd
from scipy.stats import pearsonr

# --- 方向分類などの関数 ---
def dfs_all_nodes(node):
    nodes = [node]
    for child in node.children:
        nodes.extend(dfs_all_nodes(child))
    return nodes

def bbox_center(bbox):
    x_min, y_min, x_max, y_max = bbox
    return (x_min + x_max) / 2, (y_min + y_max) / 2

def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 方向カテゴリ（順序固定） ---
direction_types = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]

# --- 外部から与えられた preds データを使う ---
# all_preds_dict[(backbone, method, beam_width)] に格納されている前提
# 例: all_preds_dict[("r50_4scale", "DRGG", 1)]

results = []

for backbone in backbones:
    for method in methods:
        key1 = (backbone, method, 1)
        key20 = (backbone, method, 20)

        if key1 not in all_preds_dict or key20 not in all_preds_dict:
            print(f"[WARN] Missing preds for {backbone}-{method}")
            continue

        preds1 = all_preds_dict[key1]
        preds20 = all_preds_dict[key20]

        # --- REDS差分の準備 ---
        file_to_reds_diff = {}
        for p1 in preds1:
            fname = p1["file_name"]
            reds1 = p1["reds"]
            p20 = next((p for p in preds20 if p["file_name"] == fname), None)
            if p20 is not None and "reds" in p20:
                reds20 = p20["reds"]
                file_to_reds_diff[fname] = reds20 - reds1

        # --- 視線方向集計（8方向） ---
        file_to_direction_counts = defaultdict(Counter)

        for p1 in preds1:
            fname = p1["file_name"]
            if fname not in file_to_reds_diff:
                continue
            root = p1["pred_tree"]
            ordered_nodes = dfs_all_nodes(root)
            nodes = [n for n in ordered_nodes if n.category != -1]

            for i in range(len(nodes) - 1):
                x1, y1 = bbox_center(nodes[i].bbox)
                x2, y2 = bbox_center(nodes[i + 1].bbox)
                dx, dy = x2 - x1, y2 - y1

                if dx == 0 and dy == 0:
                    continue

                direction = classify_8_directions(dx, dy)
                if direction != "不明":
                    file_to_direction_counts[fname][direction] += 1

        # --- DataFrame 構築 ---
        df_direction = pd.DataFrame.from_dict(file_to_direction_counts, orient="index").fillna(0)
        df_reds = pd.DataFrame.from_dict(file_to_reds_diff, orient="index", columns=["REDS_diff"])
        df_combined = df_direction.join(df_reds).dropna()

        # --- 相関計算 ---
        for direction in direction_types:
            reds = df_combined["REDS_diff"].values
            counts = df_combined[direction].values
            r, p = pearsonr(counts, reds)
            results.append({
                "Backbone": backbone,
                "Method": method,
                "Direction": direction,
                "Pearson r": r,
                "p-value": p
            })

# --- 出力 ---
df_corr_all = pd.DataFrame(results)

# print("=== Correlation Results ===")
# print(df_corr_all.to_string(index=False))

# latex = df_corr_all.to_latex(
#     index=False,
#     float_format="%.4f",
#     caption="Correlation between REDS difference (Beam 1→20) and directional transitions per image, by backbone and method.",
#     label="tab:reds_direction_corr_all"
# )
# print("\n=== LaTeX ===")
# print(latex)

In [None]:
# --- r†形式で整形 ---
def format_r_dagger(row):
    r_val = f"{row['Pearson r']:.3f}"
    if row["p-value"] < 0.001:
        return f"{r_val}$\\dagger$"
    else:
        return r_val

df_corr_all["r(p)"] = df_corr_all.apply(format_r_dagger, axis=1)

# --- ピボット（方向ごとの列に） ---
df_pivot = df_corr_all.pivot(index=["Backbone", "Method"], columns="Direction", values="r(p)").reset_index()

# --- 明示的なカラム順指定 ---
ordered_cols = ["Backbone", "Method", "右", "右下", "下", "左下", "左", "左上", "上", "右上"]
df_pivot = df_pivot[ordered_cols]

# --- LaTeX書き出し ---
latex_table = df_pivot.to_latex(
    index=False,
    escape=False,
    column_format="ll" + "c" * 8,
    caption=r"Correlation between REDS difference (Beam 1$\to$20) and directional transition counts. Pearson $r$ shown; $p < 0.001$ marked with $\dagger$.",
    label="tab:reds_direction_compact",
    multicolumn=True,
    multicolumn_format='c',
    longtable=False
)

print(latex_table)


In [None]:
from collections import defaultdict
import pandas as pd
from math import atan2, degrees
from statsmodels.stats.contingency_tables import mcnemar

def dfs_all_nodes(node):
    nodes = [node]
    for child in node.children:
        nodes.extend(dfs_all_nodes(child))
    return nodes

def bbox_center(bbox):
    x_min, y_min, x_max, y_max = bbox
    return (x_min + x_max) / 2, (y_min + y_max) / 2

def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

direction_types = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
records = []

for backbone in backbones:
    for method in methods:
        key1 = (backbone, method, 1)
        key20 = (backbone, method, 20)

        if key1 not in all_preds_dict or key20 not in all_preds_dict:
            print(f"[WARN] Missing: {key1} or {key20}")
            continue

        preds1 = all_preds_dict[key1]
        preds20 = all_preds_dict[key20]

        map1 = {p["file_name"]: p for p in preds1}
        map20 = {p["file_name"]: p for p in preds20}
        common_files = set(map1) & set(map20)

        # 各方向に対する正誤のカウント（A, B, C, D）
        direction_stats = {d: {"A": 0, "B": 0, "C": 0, "D": 0} for d in direction_types}

        for fname in common_files:
            gt = map1[fname]["gt_tree"]
            pred1 = map1[fname]["pred_tree"]
            pred20 = map20[fname]["pred_tree"]

            gt_nodes = [n for n in dfs_all_nodes(gt) if n.category != -1]
            if len(gt_nodes) < 2:
                continue

            pred1_nodes = [n for n in dfs_all_nodes(pred1) if n.category != -1]
            pred20_nodes = [n for n in dfs_all_nodes(pred20) if n.category != -1]
            pred1_edges = set((n1.label, n2.label) for n1, n2 in zip(pred1_nodes, pred1_nodes[1:]))
            pred20_edges = set((n1.label, n2.label) for n1, n2 in zip(pred20_nodes, pred20_nodes[1:]))

            for i in range(len(gt_nodes) - 1):
                n1, n2 = gt_nodes[i], gt_nodes[i + 1]
                x1, y1 = bbox_center(n1.bbox)
                x2, y2 = bbox_center(n2.bbox)
                dx, dy = x2 - x1, y2 - y1
                direction = classify_8_directions(dx, dy)
                if direction not in direction_types:
                    continue

                edge = (n1.label, n2.label)
                correct1 = edge in pred1_edges
                correct20 = edge in pred20_edges

                if correct1 and correct20:
                    direction_stats[direction]["A"] += 1
                elif correct1 and not correct20:
                    direction_stats[direction]["B"] += 1
                elif not correct1 and correct20:
                    direction_stats[direction]["C"] += 1
                else:
                    direction_stats[direction]["D"] += 1

        # 精度と有意性のまとめ
        for d in direction_types:
            A = direction_stats[d]["A"]
            B = direction_stats[d]["B"]
            C = direction_stats[d]["C"]
            D = direction_stats[d]["D"]
            total = A + B + C + D
            if total == 0:
                continue
            acc1 = (A + B) / total
            acc20 = (A + C) / total
            delta = acc20 - acc1

            # McNemar検定
            try:
                table = [[A, B], [C, D]]
                res = mcnemar(table, exact=True)
                p_val = res.pvalue
            except Exception as e:
                p_val = None
                print(f"[WARN] McNemar failed: {backbone}, {method}, {d}: {e}")

            records.append({
                "Backbone": backbone,
                "Method": method,
                "Direction": d,
                "Accuracy@1": acc1,
                "Accuracy@20": acc20,
                "ΔAccuracy": delta,
                "p-value": p_val
            })

# --- DataFrame化 ---
df = pd.DataFrame(records)
df["acc_str"] = df.apply(
    lambda row: (
        f"\\textbf{{{row['Accuracy@20']*100:.1f}}} ({row['ΔAccuracy']*100:+.1f})"
        if row["p-value"] is not None and row["p-value"] < 0.05
        else f"{row['Accuracy@20']*100:.1f} ({row['ΔAccuracy']*100:+.1f})"
    ), axis=1
)

# --- ピボット整形 ---
df_pivot = df.pivot(index=["Backbone", "Method"], columns="Direction", values="acc_str").reset_index()
ordered_cols = ["Backbone", "Method"] + direction_types
for d in direction_types:
    if d not in df_pivot.columns:
        df_pivot[d] = ""
df_pivot = df_pivot[ordered_cols]

# --- 表示 ---
# print("\n=== 読み順方向別 accuracy@20 (+Δ) [p<.05 で太字] ===")
# print(df_pivot.to_string(index=False))

# --- LaTeX 出力 ---
latex = df_pivot.to_latex(
    index=False,
    escape=False,
    column_format="ll" + "c" * len(direction_types),
    caption=r"Accuracy@20 per DFS reading-order direction, with improvement from Beam 1. Values in bold are statistically significant ($p < 0.05$) under McNemar's test.",
    label="tab:reading_order_directional_accuracy"
)
# print("\n=== LaTeX ===")
# print(latex)

def print_significant_directions(df):
    print("\n=== 有意差ありの方向 (p < 0.05) ===")
    for (backbone, method), group in df.groupby(["Backbone", "Method"]):
        sig_dirs = []
        for _, row in group.iterrows():
            direction = row["Direction"]
            p = row["p-value"]
            if p is not None and p < 0.05:
                sig_dirs.append(direction)
        mark = "✔" * len(sig_dirs) if sig_dirs else "×"
        print(f"[{backbone:>25} | {method:<20}] {mark} {', '.join(sig_dirs) if sig_dirs else '(なし)'}")

print_significant_directions(df)

In [None]:
# Backbone 表示名マップ
backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage",
}

# 方向の表示順とラベル
direction_display_order = [
    ("右", "Right"),
    ("右下", "Bottom-Right"),
    ("下", "Bottom"),
    ("左下", "Bottom-Left"),
    ("左", "Left"),
    ("左上", "Top-Left"),
    ("上", "Top"),
    ("右上", "Top-Right"),
]

# DRGG-BEBS = DRGGBBoxEmbTFEnc のみ抽出
df_bebs = df[df["Method"] == "DRGGBBoxEmbTFEnc"].copy()
df_bebs["BackboneDisplay"] = df_bebs["Backbone"].map(backbone_name_map)

# LaTeX 出力生成
lines = []
lines.append(r"\begin{tabular}{l|rrrrrrrr}")
lines.append(r"\toprule")
header = (
    r"\multicolumn{1}{l}{Backbone} & " +
    " & ".join([rf"\multicolumn{{1}}{{l}}{{{name}}}" for _, name in direction_display_order]) +
    r" \\"
)
lines.append(header)
lines.append(r"\midrule")

for bname in ["ResNet-50", "ViT", "Swin", "DiT", "InternImage"]:
    rows = df_bebs[df_bebs["BackboneDisplay"] == bname]
    if rows.empty:
        continue
    values = []
    for jp_name, _ in direction_display_order:
        row = rows[rows["Direction"] == jp_name]
        if row.empty:
            values.append("")  # データがない方向
        else:
            acc20 = row["Accuracy@20"].values[0] * 100
            delta = row["ΔAccuracy"].values[0] * 100
            sig = (row["p-value"].values[0] < 0.05) if row["p-value"].notna().values[0] else False
            star = r"$^\star$" if sig else ""
            values.append(f"{acc20:.1f}\\ ({delta:+.1f}){star}")
    lines.append(f"{bname:<15} & " + " & ".join(values) + r" \\")

lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")

# 出力
print("\n".join(lines))

In [None]:
# 表示名マッピング
backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage",
}

# 表示順と方向ラベル
direction_display_order = [
    ("右", "Right"),
    ("右下", "Bottom-Right"),
    ("下", "Bottom"),
    ("左下", "Bottom-Left"),
    ("左", "Left"),
    ("左上", "Top-Left"),
    ("上", "Top"),
    ("右上", "Top-Right"),
]

# Method と Beam Width から Decoder 表示名を決定
def get_decoder_name(method: str, beam_width: int) -> str:
    if method == "DRGG" and beam_width == 20:
        return "DRGG-BS"
    elif method == "DRGGBBoxEmbTFEnc" and beam_width == 20:
        return "DRGG-BEBS"
    else:
        return None  # 無視対象

# df（元データ）にDecoder名・Backbone表示名を追加
df["Decoder"] = df.apply(
    lambda row: get_decoder_name(row["Method"], 20), axis=1
)
df["BackboneDisplay"] = df["Backbone"].map(backbone_name_map)
df_filtered = df[df["Decoder"].notnull()].copy()

# LaTeX行生成
lines = []
lines.append(r"\begin{tabular}{ll|rrrrrrrr}")
lines.append(r"\toprule")
header = (
    r"\multicolumn{1}{l}{Backbone} & " +
    r"\multicolumn{1}{l}{Decoder} & " +
    " & ".join([rf"\multicolumn{{1}}{{l}}{{{label}}}" for _, label in direction_display_order]) +
    r" \\"
)
lines.append(header)
lines.append(r"\midrule")

for bname in ["ResNet-50", "ViT", "Swin", "DiT", "InternImage"]:
    group = df_filtered[df_filtered["BackboneDisplay"] == bname]
    if group.empty:
        continue
    group = group.set_index("Decoder")
    decoders = ["DRGG-BS", "DRGG-BEBS"]
    for i, decoder in enumerate(decoders):
        row_group = group.loc[decoder] if decoder in group.index else None
        if row_group is None:
            continue
        row_data = df_filtered[
            (df_filtered["BackboneDisplay"] == bname) &
            (df_filtered["Decoder"] == decoder)
        ]
        vals = []
        for jp_dir, _ in direction_display_order:
            row = row_data[row_data["Direction"] == jp_dir]
            if row.empty:
                vals.append("")
                continue
            acc = row["Accuracy@20"].values[0] * 100
            delta = row["ΔAccuracy"].values[0] * 100
            sig = row["p-value"].values[0] < 0.05 if row["p-value"].notna().values[0] else False
            delta_str = f"({delta:+.1f})" + (r"$^\star$" if sig else "")
            vals.append(f"{acc:.1f} {delta_str}")
        
        if i == 0:
            # 最初の行：Backbone 表示あり
            lines.append(rf"\multirow{{2}}{{*}}{{{bname}}} & {decoder} & " + " & ".join(vals) + r" \\")
        else:
            # 2行目：空白にして & 開始
            lines.append(rf"    & {decoder} & " + " & ".join(vals) + r" \\")

    if bname != "InternImage":
        lines.append(r"\midrule")

lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")
print("\n".join(lines))


In [None]:
from collections import defaultdict, Counter
import pandas as pd

# 日本語→英語対応表
jp_to_en = {
    "右": "Right",
    "右下": "Bottom-Right",
    "下": "Bottom",
    "左下": "Bottom-Left",
    "左": "Left",
    "左上": "Top-Left",
    "上": "Top",
    "右上": "Top-Right"
}
direction_types = list(jp_to_en.keys())
direction_types_en = [jp_to_en[d] for d in direction_types]

# バックボーンごとの混同行列カウント用
confusion_counts_per_backbone = {}

backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "internimage_base_4scale", "dit_base"]
methods = ["DRGGBBoxEmbTFEnc"]

mispred_rates = []  # 各backboneごとの割合記録

def truncate_tree_at_node(root: TreeNode, stop_label: str) -> TreeNode | None:
    """
    pred_tree を DFS 順に辿り、stop_label に達したらそれ以降を切り落とした部分木を返す。
    """
    stopped = False  # 単なるフラグ

    def dfs(node: TreeNode) -> TreeNode | None:
        nonlocal stopped  # ← これだけでOK
        if stopped:
            return None
        if node.label == stop_label:
            stopped = True
            return None

        new_node = TreeNode(node.id, node.label, node.bbox, node.category)
        for child in node.children:
            if stopped:
                break
            child_copy = dfs(child)
            if child_copy:
                new_node.children.append(child_copy)
        return new_node

    return dfs(root)

next_status_counts = defaultdict(lambda: [0, 0])

for backbone in backbones:
    confusion_counts = defaultdict(Counter)
    correct_preds = 0
    mispreds = 0

    for method in methods:
        key20 = (backbone, method, 20)
        if key20 not in all_preds_dict:
            print(f"[WARN] Missing: {key20}")
            continue

        preds20 = all_preds_dict[key20]
        for entry in preds20:
            gt = entry["gt_tree"]
            pred = entry["pred_tree"]

            gt_nodes = [n for n in dfs_all_nodes(gt) if n.category != -1]
            pred_nodes = [n for n in dfs_all_nodes(pred) if n.category != -1]
            pred_id2node = {n.label: n for n in pred_nodes}

            pred_edges = list(zip(pred_nodes[:-1], pred_nodes[1:]))
            pred_next_map = {n1.label: n2 for n1, n2 in pred_edges}

            for i in range(len(gt_nodes) - 1):
                n1, n2 = gt_nodes[i], gt_nodes[i + 1]
                x1, y1 = bbox_center(n1.bbox)
                x2, y2 = bbox_center(n2.bbox)
                true_dir = classify_8_directions(x2 - x1, y2 - y1)

                pred_next = pred_next_map.get(n1.label, None)
                if pred_next is None:
                    continue

                if pred_next.label == n2.label:
                    correct_preds += 1
                    continue

                px, py = bbox_center(pred_next.bbox)
                pred_dir = classify_8_directions(px - x1, py - y1)

                confusion_counts[true_dir][pred_dir] += 1
                mispreds += 1

                # 部分木構築
                partial_tree = truncate_tree_at_node(pred, n1.label)
                all_labels_in_tree = set(n.label for n in dfs_all_nodes(partial_tree))

                if n2.label not in all_labels_in_tree:
                    next_status_counts[backbone][0] += 1  # 選択可能
                else:
                    next_status_counts[backbone][1] += 1  # 選択不可能

    # 保存
    df = pd.DataFrame.from_dict(confusion_counts, orient="index", columns=direction_types).fillna(0).astype(int)
    df = df.reindex(index=direction_types, columns=direction_types)
    df.index = [jp_to_en[idx] for idx in df.index]
    df.columns = [jp_to_en[col] for col in df.columns]
    confusion_counts_per_backbone[backbone] = df

    total = correct_preds + mispreds
    if total > 0:
        rate = 100 * mispreds / total
        mispred_rates.append(rate)
        print(f"[{backbone}] mispredicted割合: {rate:.2f}%  ({mispreds} / {total})")
    else:
        print(f"[{backbone}] 有効な方向予測が存在しません")


# 正規化と平均混同行列の計算
normalized_dfs = []
for df in confusion_counts_per_backbone.values():
    normalized = df.div(df.sum(axis=1), axis=0).fillna(0)
    normalized_dfs.append(normalized)

mean_confusion = sum(normalized_dfs) / len(normalized_dfs)

# # 出力
# print("\n=== 読み順方向混同行列の割合平均（GT: 縦軸, beam20予測: 横軸）===")
# print(mean_confusion.round(3))

# 平均出力
if mispred_rates:
    mean_rate = sum(mispred_rates) / len(mispred_rates)
    print(f"\n=== mispredicted割合（backbone平均）: {mean_rate:.2f}% ===")
else:
    print("\n=== mispredicted割合の平均を計算できません（データなし） ===")

print("\n=== 正解次ノードの構築時点の分類（backboneごと） ===")
avg_ratios = [0, 0]
valid_backbones = 0
for backbone, (selectable, not_selectable) in next_status_counts.items():
    total = selectable + not_selectable
    if total == 0:
        continue
    r1 = selectable / total * 100
    r2 = not_selectable / total * 100
    print(f"[{backbone}]")
    print(f"  1. 選択可能  : {selectable} ({r1:.2f}%)")
    print(f"  2. 選択不可能: {not_selectable} ({r2:.2f}%)")
    avg_ratios[0] += r1
    avg_ratios[1] += r2
    valid_backbones += 1

if valid_backbones > 0:
    print("\n=== 分類割合（backbone平均） ===")
    print(f"  1. 選択可能: {avg_ratios[0]/valid_backbones:.2f}%")
    print(f"  2. 選択不可能: {avg_ratios[1]/valid_backbones:.2f}%")
else:
    print("\n=== データなし：backbone 平均は計算不可 ===")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# direction_types と対応する英語表現
direction_types_en = ["Right", "Bottom-Right", "Bottom", "Bottom-Left",
                      "Left", "Top-Left", "Top", "Top-Right"]

# ヒートマップ描画（割合平均）
plt.figure(figsize=(10, 4))
ax = sns.heatmap(
    mean_confusion,
    # annot=True,
    # fmt=".2f",
    cmap="Blues",
    cbar=True,
    xticklabels=direction_types_en,
    yticklabels=direction_types_en
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=14)
plt.xlabel("Direction of Mispredicted BBox", fontsize=18)
plt.ylabel("Direction of GT BBox", fontsize=18)
plt.xticks(rotation=0, fontsize=10)
plt.yticks(rotation=0, fontsize=18)
# plt.title("Average Confusion Matrix of Reading Order Directions (beam20)")
plt.tight_layout()
plt.savefig("./figures/reading_order_error_direction.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/reading_order_error_direction.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
from collections import defaultdict, Counter
from scipy.stats import pearsonr

# --- bbox 辺情報を得る関数 ---
def bbox_edges(bbox):
    x_min, y_min, x_max, y_max = bbox
    return {
        "left": x_min,
        "right": x_max,
        "top": y_min,
        "bottom": y_max,
        "center_x": (x_min + x_max) / 2,
        "center_y": (y_min + y_max) / 2,
    }

# --- パターンキー（簡潔な方向ペア表記） ---
ud_keys = ["上下:下→上", "上下:上→下"]
lr_keys = ["左右:右→左", "左右:左→右"]

results = []

for backbone in backbones:
    for method in methods:
        key1 = (backbone, method, 1)
        key20 = (backbone, method, 20)

        if key1 not in all_preds_dict or key20 not in all_preds_dict:
            print(f"[WARN] Missing preds for {backbone}-{method}")
            continue

        preds1 = all_preds_dict[key1]
        preds20 = all_preds_dict[key20]

        # --- REDS差分計算 ---
        file_to_reds_diff = {}
        for p1 in preds1:
            fname = p1["file_name"]
            reds1 = p1["reds"]
            p20 = next((p for p in preds20 if p["file_name"] == fname), None)
            if p20 and "reds" in p20:
                file_to_reds_diff[fname] = p20["reds"] - reds1

        # --- バックワードパターンカウント ---
        file_to_backward_counts = defaultdict(Counter)
        for p1 in preds1:
            fname = p1["file_name"]
            if fname not in file_to_reds_diff or "pred_tree" not in p1:
                continue

            root = p1["pred_tree"]
            ordered = dfs_all_nodes(root)
            nodes = [n for n in ordered if n.category != -1]
            if len(nodes) < 3:
                continue

            for i in range(len(nodes) - 2):
                n1, n2, n3 = nodes[i], nodes[i+1], nodes[i+2]
                b1, b2, b3 = bbox_edges(n1.bbox), bbox_edges(n2.bbox), bbox_edges(n3.bbox)

                dx1 = b2["center_x"] - b1["center_x"]
                dx2 = b3["center_x"] - b2["center_x"]
                dy1 = b2["center_y"] - b1["center_y"]
                dy2 = b3["center_y"] - b2["center_y"]

                if dy1 > 0 and dy2 < 0 and b2["center_y"] > b1["bottom"] and b3["center_y"] < b2["top"]:
                    file_to_backward_counts[fname]["上下:下→上"] += 1
                elif dy1 < 0 and dy2 > 0 and b2["center_y"] < b1["top"] and b3["center_y"] > b2["bottom"]:
                    file_to_backward_counts[fname]["上下:上→下"] += 1
                if dx1 > 0 and dx2 < 0 and b2["center_x"] > b1["right"] and b3["center_x"] < b2["left"]:
                    file_to_backward_counts[fname]["左右:左→右"] += 1
                elif dx1 < 0 and dx2 > 0 and b2["center_x"] < b1["left"] and b3["center_x"] > b2["right"]:
                    file_to_backward_counts[fname]["左右:右→左"] += 1

        # --- 総和カウント ---
        for fname in file_to_backward_counts:
            total = sum(file_to_backward_counts[fname].get(p, 0) for p in ud_keys + lr_keys)
            file_to_backward_counts[fname]["総バックワード"] = total

        # --- 相関計算 ---
        for pattern in ud_keys + lr_keys + ["総バックワード"]:
            reds, counts = [], []
            for fname in file_to_reds_diff:
                if fname in file_to_backward_counts:
                    reds.append(file_to_reds_diff[fname])
                    counts.append(file_to_backward_counts[fname].get(pattern, 0))
            if len(reds) >= 2:
                r, p = pearsonr(counts, reds)
                results.append({
                    "Backbone": backbone,
                    "Method": method,
                    "Pattern": pattern,
                    "Pearson r": r,
                    "p-value": p
                })

# --- 結果 DataFrame ---
df_corr = pd.DataFrame(results)

# print("=== 相関（バックワードパターン） ===")
# print(df_corr.to_string(index=False))

# # --- LaTeX 出力 ---
# latex = df_corr.to_latex(
#     index=False,
#     float_format="%.4f",
#     caption="Correlation between REDS difference (Beam 1→20) and backward reading patterns per image.",
#     label="tab:reds_backward_corr"
# )
# print("\n=== LaTeX 出力 ===")
# print(latex)


In [None]:
# --- r†形式で整形 ---
def format_r_dagger(row):
    r_val = f"{row['Pearson r']:.3f}"
    return f"{r_val}$\\dagger$" if row["p-value"] < 0.001 else r_val

df_corr["r(p)"] = df_corr.apply(format_r_dagger, axis=1)

# --- ピボット：Metric列を展開し、方向ペアで一行表示に整形 ---
df_pivot = df_corr.pivot(index=["Backbone", "Method"], columns="Pattern", values="r(p)").reset_index()

# --- 明示的なカラム順（2方向ペア＋合計） ---
ordered_cols = ["Backbone", "Method",
                "上下:下→上", "上下:上→下",
                "左右:右→左", "左右:左→右",
                "総バックワード"]

# 欠損を含む列を補完（評価漏れ対策）
for col in ordered_cols:
    if col not in df_pivot.columns:
        df_pivot[col] = ""

df_pivot = df_pivot[ordered_cols]

# --- LaTeX書き出し ---
latex_table = df_pivot.to_latex(
    index=False,
    escape=False,
    column_format="ll" + "r" * (len(ordered_cols) - 2),
    caption=r"Correlation between REDS difference (Beam 1$\to$20) and backward reading patterns. Pearson $r$ shown; $p < 0.001$ marked with $\dagger$.",
    label="tab:reds_backward_compact",
    multicolumn=True,
    multicolumn_format='c',
    longtable=False
)

print(latex_table)


In [None]:
from collections import defaultdict
import pandas as pd

# バックワード分類キー（上下/左右それぞれ）
pattern_keys = [
    "上下:下-上", "上下:上-下", "上下:その他",
    "左右:右-左", "左右:左-右", "左右:その他"
]

records = []

backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "internimage_base_4scale", "dit_base"]
methods = ["DRGG", "DRGGBBoxEmbTFEnc"]
beam_widths = [1, 20]

for backbone in backbones:
    for method in methods:
        key1 = (backbone, method, 1)
        key20 = (backbone, method, 20)

        if key1 not in all_preds_dict or key20 not in all_preds_dict:
            print(f"[WARN] Missing: {key1} or {key20}")
            continue

        preds1 = all_preds_dict[key1]
        preds20 = all_preds_dict[key20]

        map1 = {p["file_name"]: p for p in preds1}
        map20 = {p["file_name"]: p for p in preds20}
        common_files = set(map1) & set(map20)

        stats = {k: [0, 0, 0] for k in pattern_keys}  # [total, correct@1, correct@20]

        for fname in common_files:
            gt = map1[fname]["gt_tree"]
            pred1 = map1[fname]["pred_tree"]
            pred20 = map20[fname]["pred_tree"]

            ordered_gt = dfs_all_nodes(gt)
            nodes = [n for n in ordered_gt if n.category != -1]
            if len(nodes) < 3:
                continue

            pred1_nodes = [n for n in dfs_all_nodes(pred1) if n.category != -1]
            pred20_nodes = [n for n in dfs_all_nodes(pred20) if n.category != -1]

            pred1_edges = set((n1.label, n2.label) for n1, n2 in zip(pred1_nodes, pred1_nodes[1:]))
            pred20_edges = set((n1.label, n2.label) for n1, n2 in zip(pred20_nodes, pred20_nodes[1:]))

            for i in range(len(nodes) - 2):
                n1, n2, n3 = nodes[i], nodes[i + 1], nodes[i + 2]
                b1 = bbox_edges(n1.bbox)
                b2 = bbox_edges(n2.bbox)
                b3 = bbox_edges(n3.bbox)

                dx1 = b2["center_x"] - b1["center_x"]
                dx2 = b3["center_x"] - b2["center_x"]
                dy1 = b2["center_y"] - b1["center_y"]
                dy2 = b3["center_y"] - b2["center_y"]

                edge_pair = [(n1.label, n2.label), (n2.label, n3.label)]
                is_correct1 = all(e in pred1_edges for e in edge_pair)
                is_correct20 = all(e in pred20_edges for e in edge_pair)

                # --- 上下方向 ---
                if dy1 > 0 and dy2 < 0:
                    if b2["center_y"] > b1["bottom"] and b3["center_y"] < b2["top"]:
                        key = "上下:下-上"
                    else:
                        key = "上下:その他"
                elif dy1 < 0 and dy2 > 0:
                    if b2["center_y"] < b1["top"] and b3["center_y"] > b2["bottom"]:
                        key = "上下:上-下"
                    else:
                        key = "上下:その他"
                else:
                    key = "上下:その他"
                stats[key][0] += 1
                stats[key][1] += int(is_correct1)
                stats[key][2] += int(is_correct20)

                # --- 左右方向 ---
                if dx1 > 0 and dx2 < 0:
                    if b2["center_x"] > b1["right"] and b3["center_x"] < b2["left"]:
                        key = "左右:右-左"
                    else:
                        key = "左右:その他"
                elif dx1 < 0 and dx2 > 0:
                    if b2["center_x"] < b1["left"] and b3["center_x"] > b2["right"]:
                        key = "左右:左-右"
                    else:
                        key = "左右:その他"
                else:
                    key = "左右:その他"
                stats[key][0] += 1
                stats[key][1] += int(is_correct1)
                stats[key][2] += int(is_correct20)

        # --- 整形 ---
        row = {"Backbone": backbone, "Method": method}
        for k in pattern_keys:
            total, c1, c20 = stats[k]
            if total == 0:
                row[k] = ""
            else:
                acc1 = c1 / total * 100
                acc20 = c20 / total * 100
                delta = acc20 - acc1
                row[k] = f"{acc20:.1f} ({delta:+.1f})"
        records.append(row)

# --- 出力 ---
df = pd.DataFrame(records)
df = df[["Backbone", "Method"] + pattern_keys]

print("\n=== GTトリプレット方向別 accuracy@20 (+Δ) ===")
print(df.to_string(index=False))

# --- LaTeX 出力 ---
latex = df.to_latex(
    index=False,
    escape=False,
    column_format="ll" + "c" * len(pattern_keys),
    caption=r"Accuracy@20 per GT reading-order triplet direction, with improvement from Beam 1. Format: acc20 (+Δ).",
    label="tab:gt_triplet_directional_accuracy"
)
print("\n=== LaTeX ===")
print(latex)


In [None]:
from collections import defaultdict, Counter
import pandas as pd

# パターンキー
pattern_keys_v = ["上下:下-上", "上下:上-下", "上下:その他"]
pattern_keys_h = ["左右:右-左", "左右:左-右", "左右:その他"]

confusions_per_backbone = {
    "上下": {b: defaultdict(Counter) for b in backbones},
    "左右": {b: defaultdict(Counter) for b in backbones}
}

method = "DRGGBBoxEmbTFEnc"
beam_width = 20

for backbone in backbones:
    key = (backbone, method, beam_width)
    if key not in all_preds_dict:
        print(f"[WARN] Missing: {key}")
        continue

    preds = all_preds_dict[key]

    for entry in preds:
        gt = entry["gt_tree"]
        pred = entry["pred_tree"]

        gt_nodes = [n for n in dfs_all_nodes(gt) if n.category != -1]
        pred_nodes = [n for n in dfs_all_nodes(pred) if n.category != -1]
        pred_label_map = {n.label: n for n in pred_nodes}
        pred_labels = [n.label for n in pred_nodes]

        if len(gt_nodes) < 3 or len(pred_nodes) < 3:
            continue

        for i in range(len(gt_nodes) - 2):
            n1, n2, n3 = gt_nodes[i], gt_nodes[i+1], gt_nodes[i+2]

            # --- anchor n1 が予測に含まれない場合は除外 ---
            if n1.label not in pred_label_map:
                continue
            idx = pred_labels.index(n1.label)
            if idx + 2 >= len(pred_nodes):
                continue

            n2p, n3p = pred_nodes[idx + 1], pred_nodes[idx + 2]

            # --- 正解ならスキップ ---
            if n2p.label == n2.label and n3p.label == n3.label:
                continue

            # --- bbox 中心 ---
            def center(n): return bbox_edges(n.bbox)

            # --- 上下パターン判定 ---
            def classify_vertical(n1, n2, n3):
                b1, b2, b3 = center(n1), center(n2), center(n3)
                dy1 = b2["center_y"] - b1["center_y"]
                dy2 = b3["center_y"] - b1["center_y"]
                if dy1 > 0 and dy2 < 0 and b2["center_y"] > b1["bottom"] and b3["center_y"] < b1["top"]:
                    return "上下:下-上"
                elif dy1 < 0 and dy2 > 0 and b2["center_y"] < b1["top"] and b3["center_y"] > b1["bottom"]:
                    return "上下:上-下"
                else:
                    return "上下:その他"

            # --- 左右パターン判定 ---
            def classify_horizontal(n1, n2, n3):
                b1, b2, b3 = center(n1), center(n2), center(n3)
                dx1 = b2["center_x"] - b1["center_x"]
                dx2 = b3["center_x"] - b1["center_x"]
                if dx1 > 0 and dx2 < 0 and b2["center_x"] > b1["right"] and b3["center_x"] < b1["left"]:
                    return "左右:右-左"
                elif dx1 < 0 and dx2 > 0 and b2["center_x"] < b1["left"] and b3["center_x"] > b1["right"]:
                    return "左右:左-右"
                else:
                    return "左右:その他"

            gt_v = classify_vertical(n1, n2, n3)
            pred_v = classify_vertical(n1, n2p, n3p)
            confusions_per_backbone["上下"][backbone][gt_v][pred_v] += 1

            gt_h = classify_horizontal(n1, n2, n3)
            pred_h = classify_horizontal(n1, n2p, n3p)
            confusions_per_backbone["左右"][backbone][gt_h][pred_h] += 1

confusion_avg = {}
for axis, pattern_keys in [("上下", pattern_keys_v), ("左右", pattern_keys_h)]:
    normalized_dfs = []
    for backbone in backbones:
        counter = confusions_per_backbone[axis][backbone]
        df = pd.DataFrame.from_dict(counter, orient="index", columns=pattern_keys).fillna(0).astype(int)
        df = df.reindex(index=pattern_keys, columns=pattern_keys)
        df_norm = df.div(df.sum(axis=1), axis=0).fillna(0)
        normalized_dfs.append(df_norm)
    confusion_avg[axis] = sum(normalized_dfs) / len(normalized_dfs)


In [None]:
confusions_per_backbone

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# ラベル定義
pattern_keys_v = ["上下:下-上", "上下:上-下", "上下:その他"]
pattern_keys_h = ["左右:右-左", "左右:左-右", "左右:その他"]

pattern_keys_v_en = ["V:Down-Up", "V:Up-Down", "V:Other"]
pattern_keys_h_en = ["H:Right-Left", "H:Left-Right", "H:Other"]

ja_to_en = {
    "上下:下-上": "V:Down-Up", "上下:上-下": "V:Up-Down", "上下:その他": "V:Other",
    "左右:右-左": "H:Right-Left", "左右:左-右": "H:Other", "左右:左-右": "H:Left-Right", "左右:その他": "H:Other"
}

for axis, keys_ja, keys_en in [("上下", pattern_keys_v, pattern_keys_v_en), ("左右", pattern_keys_h, pattern_keys_h_en)]:
    df = confusion_avg[axis].copy()
    df.index = [ja_to_en[i] for i in df.index]
    df.columns = [ja_to_en[c] for c in df.columns]

    plt.figure(figsize=(10, 2))
    ax = sns.heatmap(
        df,
        cmap="Blues",
        cbar=True,
        xticklabels=keys_en,
        yticklabels=keys_en,
        # annot=True,
        # fmt=".2f"
    )
    cbar = ax.collections[0].colorbar
    cbar.ax.tick_params(labelsize=14)
    plt.xlabel("Predicted Triplet", fontsize=14)
    plt.ylabel("GT Triplet", fontsize=14)
    plt.xticks(rotation=0, fontsize=18)
    plt.yticks(rotation=0, fontsize=18)
    plt.tight_layout()
    label = "v" if axis == "上下" else "h"
    plt.savefig(f"triplet_{label}_error_direction.png", dpi=300, bbox_inches='tight')
    plt.savefig(f"triplet_{label}_error_direction.eps", format="eps", dpi=300, bbox_inches='tight')
    plt.show()


In [None]:
from math import sqrt
import pandas as pd
import numpy as np

records = []

for backbone in backbones:
    for method in ["DRGG", "DRGGBBoxEmbTFEnc"]:
        key_1 = (backbone, method, 1)
        key_20 = (backbone, method, 20)

        if key_1 not in all_preds_dict or key_20 not in all_preds_dict:
            continue

        preds_1 = all_preds_dict[key_1]
        preds_20 = all_preds_dict[key_20]

        map_1 = {p["file_name"]: p for p in preds_1}
        map_20 = {p["file_name"]: p for p in preds_20}
        common_files = set(map_1) & set(map_20)

        for fname in common_files:
            gt = map_1[fname]["gt_tree"]
            pred1 = map_1[fname]["pred_tree"]
            pred2 = map_20[fname]["pred_tree"]

            nodes = [n for n in dfs_all_nodes(gt) if n.category != -1]
            if len(nodes) < 2:
                continue

            pred1_edges = set((n1.label, n2.label) for n1, n2 in zip(dfs_all_nodes(pred1), dfs_all_nodes(pred1)[1:]) if n1.category != -1 and n2.category != -1)
            pred2_edges = set((n1.label, n2.label) for n1, n2 in zip(dfs_all_nodes(pred2), dfs_all_nodes(pred2)[1:]) if n1.category != -1 and n2.category != -1)

            for i in range(len(nodes) - 1):
                n1, n2 = nodes[i], nodes[i + 1]
                x1, y1 = bbox_center(n1.bbox)
                x2, y2 = bbox_center(n2.bbox)
                dx = abs(x2 - x1)
                dy = abs(y2 - y1)

                if dx >= dy:
                    dist = dx
                    scale = max(n1.bbox[2] - n1.bbox[0], n2.bbox[2] - n2.bbox[0])  # width
                else:
                    dist = dy
                    scale = max(n1.bbox[3] - n1.bbox[1], n2.bbox[3] - n2.bbox[1])  # height

                if scale == 0:
                    continue

                norm_dist = dist / scale
                edge = (n1.label, n2.label)
                correct1 = int(edge in pred1_edges)
                correct20 = int(edge in pred2_edges)

                records.append({
                    "Backbone": backbone,
                    "Method": method,
                    "file": fname,
                    "distance": norm_dist,
                    "Acc@1": correct1,
                    "Acc@20": correct20,
                    "Diff": correct20 - correct1
                })

# --- DataFrame化 ---
df = pd.DataFrame(records)

# --- bin分割と集計 ---
bins = [0, 1, 2, 4, 8, 16, np.inf]
labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, \infty)"]
df["bin"] = pd.cut(df["distance"], bins=bins, labels=labels)

grouped = df.groupby(["Backbone", "Method", "bin"]).agg(
    Count=("Diff", "count"),
    Acc_1=("Acc@1", "mean"),
    Acc_20=("Acc@20", "mean"),
    Diff=("Diff", "mean")
).reset_index()

grouped["Acc@1 (%)"] = (grouped["Acc_1"] * 100).round(1)
grouped["Acc@20 (%)"] = (grouped["Acc_20"] * 100).round(1)
grouped["ΔAccuracy (%)"] = (grouped["Diff"] * 100).round(1)
grouped["acc_str"] = grouped.apply(
    lambda row: f"{row['Acc@20 (%)']:.1f} ({row['ΔAccuracy (%)']:+.1f})", axis=1
)

# --- ピボット（距離binを列化） ---
df_pivot = grouped.pivot(index=["Backbone", "Method"], columns="bin", values="acc_str").reset_index()
for b in labels:
    if b not in df_pivot.columns:
        df_pivot[b] = ""
df_pivot = df_pivot[["Backbone", "Method"] + labels]

# --- 表示 ---
# print("\n=== 読み順構造における Beam Width の比較（手法固定） ===")
# print(df_pivot.to_string(index=False))

# --- LaTeX 出力 ---
latex = df_pivot.to_latex(
    index=False,
    escape=False,
    column_format="ll" + "c" * len(labels),
    caption=r"Accuracy@20 per reading-order distance bin, comparing beam width 1 vs 20 (fixed method). Format: acc20 (+Δ).",
    label="tab:beam_width_reading_order_fixed_method"
)
# print("\n=== LaTeX ===")
# print(latex)


In [None]:
from statsmodels.stats.contingency_tables import mcnemar

# --- 元の集計をまず行う ---
grouped = df.groupby(["Backbone", "Method", "bin"]).agg(
    Count=("Diff", "count"),
    Acc_1=("Acc@1", "mean"),
    Acc_20=("Acc@20", "mean"),
    Diff=("Diff", "mean")
).reset_index()

grouped["Acc@1 (%)"] = (grouped["Acc_1"] * 100).round(1)
grouped["Acc@20 (%)"] = (grouped["Acc_20"] * 100).round(1)
grouped["ΔAccuracy (%)"] = (grouped["Diff"] * 100).round(1)

# --- McNemar検定（元の df を使って） ---
pvals = []
for (backbone, method, bin_label), g in df.groupby(["Backbone", "Method", "bin"]):
    A = sum((g["Acc@1"] == 1) & (g["Acc@20"] == 1))
    B = sum((g["Acc@1"] == 1) & (g["Acc@20"] == 0))
    C = sum((g["Acc@1"] == 0) & (g["Acc@20"] == 1))
    D = sum((g["Acc@1"] == 0) & (g["Acc@20"] == 0))

    if B + C < 5:
        p_val = None
    else:
        try:
            res = mcnemar([[A, B], [C, D]], exact=True)
            p_val = res.pvalue
        except Exception as e:
            print(f"[WARN] McNemar failed: {backbone}, {method}, {bin_label}: {e}")
            p_val = None

    pvals.append({
        "Backbone": backbone,
        "Method": method,
        "bin": bin_label,
        "p-value": p_val
    })

# --- マージして acc_str を構築 ---
pval_df = pd.DataFrame(pvals)
grouped = grouped.merge(pval_df, on=["Backbone", "Method", "bin"], how="left")
grouped["acc_str"] = grouped.apply(
    lambda row: (
        f"\\textbf{{{row['Acc@20 (%)']:.1f}}} ({row['ΔAccuracy (%)']:+.1f})"
        if row["p-value"] is not None and row["p-value"] < 0.05
        else f"{row['Acc@20 (%)']:.1f} ({row['ΔAccuracy (%)']:+.1f})"
    ),
    axis=1
)

# --- ピボット ---
df_pivot = grouped.pivot(index=["Backbone", "Method"], columns="bin", values="acc_str").reset_index()
for b in labels:
    if b not in df_pivot.columns:
        df_pivot[b] = ""
df_pivot = df_pivot[["Backbone", "Method"] + labels]

# --- 表示 ---
# print("\n=== 読み順構造における Beam Width の比較（手法固定, 有意差は太字） ===")
# print(df_pivot.to_string(index=False))

# --- LaTeX 出力 ---
latex = df_pivot.to_latex(
    index=False,
    escape=False,
    column_format="ll" + "c" * len(labels),
    caption=r"Accuracy@20 per reading-order distance bin, comparing beam width 1 vs 20 (fixed method). Values in bold are statistically significant ($p < 0.05$) under McNemar's test.",
    label="tab:beam_width_reading_order_fixed_method"
)
# print("\n=== LaTeX ===")
# print(latex)


In [None]:
def print_mcnemar_significance(grouped, labels):
    print("\n=== 距離ビンごとの有意差 (p < 0.05, McNemar) ===")
    for (backbone, method), subdf in grouped.groupby(["Backbone", "Method"]):
        print(f"[Backbone={backbone} | Method={method}]")
        line = "  "
        for b in labels:
            row = subdf[subdf["bin"] == b]
            if row.empty:
                mark = "－"  # 該当なし
            elif row.iloc[0]["p-value"] is not None and row.iloc[0]["p-value"] < 0.05:
                mark = "✔"
            else:
                mark = "✘"
            line += f"{mark} {b:<10} "
        print(line)

print_mcnemar_significance(grouped, labels)


In [None]:
import re

# --- 表示名マッピング ---
backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage",
}

# --- DRGG-BEBS（Method == "DRGGBBoxEmbTFEnc"）のみ抽出 ---
df_bebs = df_pivot[df_pivot["Method"] == "DRGGBBoxEmbTFEnc"].copy()
df_bebs["Backbone"] = df_bebs["Backbone"].map(backbone_name_map)

# --- textbf 除去＋有意差マーク付加 ---
def remove_textbf_and_add_star(cell):
    if not isinstance(cell, str):
        return cell
    # \textbf{...} (...) のときだけ $^\star$ を追加
    m = re.match(r"\\textbf{([\d.]+)}\s+\(([-+.\d]+)\)", cell)
    if m:
        return f"{m.group(1)} ({m.group(2)})$^\\star$"
    # 通常の \textbf{...} は除去だけ
    return re.sub(r"\\textbf{([^}]*)}", r"\1", cell).strip()

for col in labels:
    df_bebs[col] = df_bebs[col].apply(remove_textbf_and_add_star)

# --- 出力順制御 ---
backbone_order = ["ResNet-50", "ViT", "Swin", "DiT", "InternImage"]
df_bebs["Backbone"] = pd.Categorical(df_bebs["Backbone"], categories=backbone_order, ordered=True)
df_bebs = df_bebs.sort_values("Backbone")

# --- LaTeX 出力 ---
lines = []
lines.append(r"\begin{tabular}{l|rrrrrr}")
lines.append(r"\toprule")
header = r"Backbone & " + " & ".join(labels) + r" \\"
lines.append(header)
lines.append(r"\midrule")

for _, row in df_bebs.iterrows():
    row_vals = [row[l] if pd.notna(row[l]) else "" for l in labels]
    lines.append(f"{row['Backbone']:<13} & " + " & ".join(row_vals) + r" \\")

lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")

# 表示
print("\n".join(lines))

In [None]:
# --- 表示名変換 ---
backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage",
}

# --- 出力順 ---
backbone_order = ["ResNet-50", "ViT", "Swin", "DiT", "InternImage"]
decoder_order = ["DRGG-BS", "DRGG-BEBS"]

# --- デコーダ名列の追加 ---
decoder_map = {
    "DRGG": "DRGG-BS",
    "DRGGBBoxEmbTFEnc": "DRGG-BEBS"
}
grouped["Decoder"] = grouped["Method"].map(decoder_map)
grouped["BackboneDisplay"] = grouped["Backbone"].map(backbone_name_map)

# --- フォーマット列の生成（p < 0.05 に $^\star$ を付ける） ---
grouped["acc_str"] = grouped.apply(
    lambda row: f"{row['Acc@20 (%)']:.1f} ({row['ΔAccuracy (%)']:+.1f})"
    + (r"$^\star$" if pd.notna(row["p-value"]) and row["p-value"] < 0.05 else ""),
    axis=1
)

# --- ピボット ---
df_pivot = grouped.pivot(index=["BackboneDisplay", "Decoder"], columns="bin", values="acc_str").reset_index()

# --- 欠損 bin 補完 ---
for b in labels:
    if b not in df_pivot.columns:
        df_pivot[b] = ""
df_pivot = df_pivot[["BackboneDisplay", "Decoder"] + labels]

# --- 表示順制御 ---
df_pivot["BackboneDisplay"] = pd.Categorical(df_pivot["BackboneDisplay"], categories=backbone_order, ordered=True)
df_pivot["Decoder"] = pd.Categorical(df_pivot["Decoder"], categories=decoder_order, ordered=True)
df_pivot = df_pivot.sort_values(["BackboneDisplay", "Decoder"])

# --- LaTeX 出力 ---
lines = []
lines.append(r"\begin{tabular}{llrrrrrr}")
lines.append(r"\toprule")
header = r"Backbone & Decoder & " + " & ".join(labels) + r" \\"
lines.append(header)
lines.append(r"\midrule")

for bname in backbone_order:
    sub = df_pivot[df_pivot["BackboneDisplay"] == bname]
    if sub.empty:
        continue
    for i, decoder in enumerate(decoder_order):
        row = sub[sub["Decoder"] == decoder]
        if row.empty:
            continue
        vals = [row.iloc[0][l] for l in labels]
        if i == 0:
            # DRGG-BS 行（Backbone付き）
            lines.append(rf"\multirow{{2}}{{*}}{{{bname}}} & {decoder} & " + " & ".join(vals) + r" \\")
        else:
            # DRGG-BEBS 行（Backboneなし）
            lines.append(rf"    & {decoder} & " + " & ".join(vals) + r" \\")
    if bname != backbone_order[-1]:
        lines.append(r"\midrule")

lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")

# --- 出力 ---
print("\n".join(lines))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# --- 対象Backboneを指定して可視化（例: swin_base_384_4scale） ---
target_backbone = "swin_base_384_4scale"
target_df = grouped[grouped["Backbone"] == target_backbone]

plt.figure(figsize=(10, 5))
sns.barplot(
    data=target_df,
    x="bin",
    y="ΔAccuracy (%)",
    hue="Method",
    palette="Set2"
)
plt.title(f"Accuracy Improvement by Beam Width (Reading Order) — {target_backbone}")
plt.xlabel("Normalized Distance Bin (dominant axis)")
plt.ylabel("Accuracy Improvement (%)")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
from collections import defaultdict, Counter
import pandas as pd
import numpy as np

# 距離 bin 定義
bins = [0, 1, 2, 4, 8, 16, np.inf]
labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", r"(16, $\infty$)"]

# 各 backbone ごとの bin × bin confusion matrix カウント
bin_confusions_per_backbone = {backbone: defaultdict(Counter) for backbone in backbones}

backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "internimage_base_4scale", "dit_base"]
# mispred割合用
correct_counts_per_backbone = defaultdict(int)
mispred_counts_per_backbone = defaultdict(int)

def truncate_tree_at_node(root: TreeNode, stop_label: str) -> TreeNode | None:
    """
    pred_tree を DFS 順に辿り、stop_label に達したらそれ以降を切り落とした部分木を返す。
    """
    stopped = False  # 単なるフラグ

    def dfs(node: TreeNode) -> TreeNode | None:
        nonlocal stopped  # ← これだけでOK
        if stopped:
            return None
        if node.label == stop_label:
            stopped = True
            return None

        new_node = TreeNode(node.id, node.label, node.bbox, node.category)
        for child in node.children:
            if stopped:
                break
            child_copy = dfs(child)
            if child_copy:
                new_node.children.append(child_copy)
        return new_node

    return dfs(root)

next_status_counts = defaultdict(lambda: [0, 0])

for backbone in backbones:
    # method = "DRGG"
    method = "DRGGBBoxEmbTFEnc"
    key = (backbone, method, 20)

    if key not in all_preds_dict:
        continue

    preds = all_preds_dict[key]
    for entry in preds:
        gt = entry["gt_tree"]
        pred = entry["pred_tree"]

        gt_nodes = [n for n in dfs_all_nodes(gt) if n.category != -1]
        pred_nodes = [n for n in dfs_all_nodes(pred) if n.category != -1]

        pred_edges = list(zip(pred_nodes[:-1], pred_nodes[1:]))
        pred_next_map = {n1.label: n2 for n1, n2 in pred_edges}

        for i in range(len(gt_nodes) - 1):
            n1, n2 = gt_nodes[i], gt_nodes[i + 1]
            
            # GT距離と bin
            x1, y1 = bbox_center(n1.bbox)
            x2, y2 = bbox_center(n2.bbox)
            dx, dy = abs(x2 - x1), abs(y2 - y1)
            if dx >= dy:
                dist_gt = dx
                scale = max(n1.bbox[2] - n1.bbox[0], n2.bbox[2] - n2.bbox[0])
            else:
                dist_gt = dy
                scale = max(n1.bbox[3] - n1.bbox[1], n2.bbox[3] - n2.bbox[1])
            if scale == 0:
                continue
            norm_dist_gt = dist_gt / scale
            bin_gt = pd.cut([norm_dist_gt], bins=bins, labels=labels)[0]
            if pd.isna(bin_gt):
                continue
        
            # 次ノード予測
            pred_next = pred_next_map.get(n1.label, None)
            if pred_next is None:
                continue
        
            if pred_next.label == n2.label:
                correct_counts_per_backbone[backbone] += 1
                continue  # 正解
        
            # 距離と bin
            px, py = bbox_center(pred_next.bbox)
            dxp, dyp = abs(px - x1), abs(py - y1)
            if dxp >= dyp:
                dist_pred = dxp
                scale_p = max(n1.bbox[2] - n1.bbox[0], pred_next.bbox[2] - pred_next.bbox[0])
            else:
                dist_pred = dyp
                scale_p = max(n1.bbox[3] - n1.bbox[1], pred_next.bbox[3] - pred_next.bbox[1])
            if scale_p == 0:
                continue
            norm_dist_pred = dist_pred / scale_p
            bin_pred = pd.cut([norm_dist_pred], bins=bins, labels=labels)[0]
            if pd.isna(bin_pred):
                continue
        
            # カウント：mispred
            bin_confusions_per_backbone[backbone][bin_gt][bin_pred] += 1
            mispred_counts_per_backbone[backbone] += 1

            # 部分木構築
            partial_tree = truncate_tree_at_node(pred, n1.label)
            all_labels_in_tree = set(n.label for n in dfs_all_nodes(partial_tree))

            if n2.label not in all_labels_in_tree:
                next_status_counts[backbone][0] += 1  # 選択可能
            else:
                next_status_counts[backbone][1] += 1  # 選択不可能

# 各 backbone の正規化行列を平均
normalized_dfs = []
for backbone, conf in bin_confusions_per_backbone.items():
    df = pd.DataFrame.from_dict(conf, orient="index", columns=labels).fillna(0).astype(int)
    df = df.reindex(index=labels, columns=labels)
    df_norm = df.div(df.sum(axis=1), axis=0).fillna(0)
    normalized_dfs.append(df_norm)

# 平均混同行列
mean_distance_confusion = sum(normalized_dfs) / len(normalized_dfs)

# mispred割合の出力（backboneごと + 平均）
rates = []
print("\n=== mispredicted距離bin割合（backboneごと）===")
for backbone in backbones:
    correct = correct_counts_per_backbone[backbone]
    mispred = mispred_counts_per_backbone[backbone]
    total = correct + mispred
    if total > 0:
        rate = 100 * mispred / total
        print(f"[{backbone}] {rate:.2f}%  ({mispred} / {total})")
        rates.append(rate)
    else:
        print(f"[{backbone}] データなし")

if rates:
    print(f"\n=== mispredicted距離bin割合（backbone平均）: {sum(rates)/len(rates):.2f}% ===")
else:
    print("\n=== mispredicted距離bin割合を計算できません（全backboneでデータなし） ===")

print("\n=== 正解次ノードの構築時点の分類（backboneごと） ===")
avg_ratios = [0, 0]
valid_backbones = 0
for backbone, (selectable, not_selectable) in next_status_counts.items():
    total = selectable + not_selectable
    if total == 0:
        continue
    r1 = selectable / total * 100
    r2 = not_selectable / total * 100
    print(f"[{backbone}]")
    print(f"  1. 選択可能  : {selectable} ({r1:.2f}%)")
    print(f"  2. 選択不可能: {not_selectable} ({r2:.2f}%)")
    avg_ratios[0] += r1
    avg_ratios[1] += r2
    valid_backbones += 1

if valid_backbones > 0:
    print("\n=== 分類割合（backbone平均） ===")
    print(f"  1. 選択可能: {avg_ratios[0]/valid_backbones:.2f}%")
    print(f"  2. 選択不可能: {avg_ratios[1]/valid_backbones:.2f}%")
else:
    print("\n=== データなし：backbone 平均は計算不可 ===")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 3.5))
ax = sns.heatmap(
    mean_distance_confusion,  # ← GT距離bin × 予測距離bin の割合平均混同行列
    # annot=True,
    # fmt=".2f",
    cmap="Blues",
    cbar=True,
    xticklabels=labels,
    yticklabels=labels
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=14)

plt.xlabel("Distance of Mispredicted BBox", fontsize=18)
plt.ylabel("Distance of GT BBox", fontsize=18)
plt.xticks(rotation=0, fontsize=18)
plt.yticks(rotation=0, fontsize=18)
# plt.title("Average Confusion Matrix of Normalized Reading Distances (beam20)")
plt.tight_layout()
plt.savefig("./figures/reading_order_error_distance.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/reading_order_error_distance.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# ----- 親子関係抽出 -----
def get_edges(node, skip_root=True):
    edges = []
    for child in node.children:
        if not (skip_root and node.category == -1):
            edges.append((node.label, child.label))
        edges.extend(get_edges(child, skip_root))
    return edges

# ----- Node ID → Node 対応辞書 -----
def build_node_dict(node):
    nodes = {}
    def recurse(n):
        nodes[n.label] = n
        for c in n.children:
            recurse(c)
    recurse(node)
    return nodes

# ----- 方向判定 -----
def edge_direction(parent, child):
    x1, y1 = bbox_center(parent.bbox)
    x2, y2 = bbox_center(child.bbox)
    dx, dy = x2 - x1, y2 - y1
    if dx < 0 and dy < 0:
        return "左上"
    elif dx > 0 and dy < 0:
        return "右上"
    elif dx < 0 and dy > 0:
        return "左下"
    elif dx >= 0 and dy >= 0:
        return "右下"
    else:
        return "その他"

# ----- 設定 -----
methods = [("DRGG", "DRGGBBoxEmbTFEnc")]
beam_widths = [1, 20]
directions = ["左上", "右上", "左下", "右下"]

records = []

for backbone in backbones:
    for method_A, method_B in methods:
        for beam_width in beam_widths:
            key_A = (backbone, method_A, beam_width)
            key_B = (backbone, method_B, beam_width)

            if key_A not in all_preds_dict or key_B not in all_preds_dict:
                print(f"[WARN] Missing: {key_A} or {key_B}")
                continue

            preds_A = all_preds_dict[key_A]
            preds_B = all_preds_dict[key_B]

            map_A = {p["file_name"]: p for p in preds_A}
            map_B = {p["file_name"]: p for p in preds_B}

            for fname in sorted(set(map_A) & set(map_B)):
                gt = map_A[fname]["gt_tree"]
                pred_A = map_A[fname]["pred_tree"]
                pred_B = map_B[fname]["pred_tree"]

                gt_edges = get_edges(gt, skip_root=True)
                pred_edges_A = set(get_edges(pred_A, skip_root=True))
                pred_edges_B = set(get_edges(pred_B, skip_root=True))
                if len(gt_edges) == 0:
                    continue

                correct_A = sum(e in pred_edges_A for e in gt_edges)
                correct_B = sum(e in pred_edges_B for e in gt_edges)
                acc_A = correct_A / len(gt_edges)
                acc_B = correct_B / len(gt_edges)
                acc_diff = acc_B - acc_A

                node_dict = build_node_dict(gt)
                dir_count = Counter()
                for pid, cid in gt_edges:
                    p, c = node_dict[pid], node_dict[cid]
                    d = edge_direction(p, c)
                    if d in directions:
                        dir_count[d] += 1

                records.append({
                    "Backbone": backbone,
                    "Method A": method_A,
                    "Method B": method_B,
                    "Beam Width": beam_width,
                    "file_name": fname,
                    "accuracy_diff": acc_diff,
                    **dir_count
                })

# ----- DataFrame 化と相関 -----
df = pd.DataFrame(records).fillna(0)

results = []
for d in directions:
    for (backbone, method_A, method_B, beam_width), grp in df.groupby(["Backbone", "Method A", "Method B", "Beam Width"]):
        if grp[d].sum() == 0:
            continue
        r, p = pearsonr(grp[d], grp["accuracy_diff"])
        results.append({
            "Backbone": backbone,
            "Method A": method_A,
            "Method B": method_B,
            "Beam Width": beam_width,
            "Direction": d,
            "Pearson r": r,
            "p-value": p
        })

df_corr = pd.DataFrame(results)

# --- 相関値を "r†" 形式に整形 ---
def format_r_dagger(row):
    r_val = f"{row['Pearson r']:.3f}"
    return f"{r_val}$\\dagger$" if row["p-value"] < 0.001 else r_val

df_corr["r(p)"] = df_corr.apply(format_r_dagger, axis=1)

# --- ピボット & 整形 ---
df_pivot = df_corr.pivot(index=["Backbone", "Method A", "Method B", "Beam Width"], columns="Direction", values="r(p)").reset_index()
df_pivot["Method A → B"] = df_pivot["Method A"] + " → " + df_pivot["Method B"]
df_pivot = df_pivot[["Backbone", "Method A → B"] + directions]

# --- LaTeX 出力 ---
latex = df_pivot.to_latex(
    index=False,
    escape=False,
    caption=r"Correlation between parent-child direction counts and accuracy improvement (Method B − Method A). Pearson $r$ shown; $p < 0.001$ marked with $\dagger$.",
    label="tab:parent_child_direction_compact"
)

print("\n=== 整形済み相関表 ===")
print(df_pivot.to_string(index=False))
print("\n=== LaTeX 出力 ===")
print(latex)

In [None]:
from collections import defaultdict
import pandas as pd
from math import atan2, degrees

# --- bbox 中心（xyxy形式） ---
def bbox_center(bbox):
    x_min, y_min, x_max, y_max = bbox
    return (x_min + x_max) / 2, (y_min + y_max) / 2

# --- 8方向分類（45度刻み） ---
def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

direction_types = ["右", "右下", "下", "左下", "左", "左上", "上", "右上", "Root"]
records = []

backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "internimage_base_4scale", "dit_base"]
for backbone in backbones:
    for method_A, method_B in [("DRGG", "DRGGBBoxEmbTFEnc")]:
        for beam_width in [1, 20]:
            key_A = (backbone, method_A, beam_width)
            key_B = (backbone, method_B, beam_width)

            if key_A not in all_preds_dict or key_B not in all_preds_dict:
                print(f"[WARN] Missing: {key_A} or {key_B}")
                continue

            preds_A = all_preds_dict[key_A]
            preds_B = all_preds_dict[key_B]
            map_A = {p["file_name"]: p for p in preds_A}
            map_B = {p["file_name"]: p for p in preds_B}
            common_files = set(map_A) & set(map_B)

            # --- カウント初期化 ---
            stats = {d: {"A": 0, "B": 0, "C": 0, "D": 0} for d in direction_types}

            for fname in common_files:
                gt = map_A[fname]["gt_tree"]
                pred_A = map_A[fname]["pred_tree"]
                pred_B = map_B[fname]["pred_tree"]

                gt_edges = get_edges(gt, skip_root=False)  # ✅ Root を含む
                pred_edges_A = set(get_edges(pred_A, skip_root=False))
                pred_edges_B = set(get_edges(pred_B, skip_root=False))
                node_dict = build_node_dict(gt)

                for pid, cid in gt_edges:
                    if pid not in node_dict or cid not in node_dict:
                        continue
                    p_node = node_dict[pid]
                    c_node = node_dict[cid]
                
                    if p_node.category == -1:
                        direction = "Root"
                    else:
                        x1, y1 = bbox_center(p_node.bbox)
                        x2, y2 = bbox_center(c_node.bbox)
                        dx, dy = x2 - x1, y2 - y1
                        direction = classify_8_directions(dx, dy)
                
                    if direction not in direction_types:
                        continue
                
                    edge = (pid, cid)
                    correct_A = edge in pred_edges_A
                    correct_B = edge in pred_edges_B
                
                    if correct_A and correct_B:
                        stats[direction]["A"] += 1
                    elif correct_A and not correct_B:
                        stats[direction]["B"] += 1
                    elif not correct_A and correct_B:
                        stats[direction]["C"] += 1
                    else:
                        stats[direction]["D"] += 1


            row = {
                "Backbone": backbone,
                "Method A → B": f"{method_A} → {method_B}",
                "Beam Width": beam_width
            }
            for d in direction_types:
                A = stats[d]["A"]
                B = stats[d]["B"]
                C = stats[d]["C"]
                D = stats[d]["D"]
                total = A + B + C + D
            
                if total == 0:
                    row[d] = ""
                else:
                    acc_A = (A + B) / total * 100
                    acc_B = (A + C) / total * 100
                    delta = acc_B - acc_A
            
                    try:
                        if B + C >= 5:
                            result = mcnemar([[A, B], [C, D]], exact=True)
                            p = result.pvalue
                        else:
                            p = None
                    except Exception as e:
                        print(f"[WARN] McNemar failed: {backbone}, {method_A}→{method_B}, {d}: {e}")
                        p = None
            
                    acc_str = (
                        f"\\textbf{{{acc_B:.1f}}} ({delta:+.1f})"
                        if p is not None and p < 0.05
                        else f"{acc_B:.1f} ({delta:+.1f})"
                    )
                    row[d] = acc_str
            records.append(row)

# --- DataFrame 化 & 表示 ---
df = pd.DataFrame(records)
df = df[["Backbone", "Method A → B", "Beam Width"] + direction_types]

# print("\n=== GTエッジの方向別 accuracy@20 (+Δ) ===")
# print(df.to_string(index=False))

# --- LaTeX 出力 ---
latex = df.to_latex(
    index=False,
    escape=False,
    column_format="lll" + "c" * len(direction_types),
    caption=r"Accuracy@20 per GT parent-child edge direction (8 directions), with improvement from Method A. Format: acc20 (+Δ).",
    label="tab:gt_edge_direction_accuracy"
)
# print("\n=== LaTeX ===")
# print(latex)

In [None]:
def print_directional_significance(df: pd.DataFrame, direction_types):
    print("\n=== 方向別有意差 (p < 0.05, McNemar) ===")
    for _, row in df.iterrows():
        heading = f"[Backbone={row['Backbone']} | Method={row['Method A → B']} | Beam Width={row['Beam Width']}]"
        print(heading)
        line = "  "
        for d in direction_types:
            val = row[d]
            if isinstance(val, str) and val.startswith("\\textbf{"):
                mark = "✔"
            elif isinstance(val, str) and val != "":
                mark = "✘"
            else:
                mark = "－"
            line += f"{mark} {d:<6} "
        print(line)

print_directional_significance(df, direction_types)

In [None]:
# --- Backbone 表示名マップ ---
backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage",
}

# --- 表示順の方向英語ラベル（Root除外） ---
direction_display = {
    "右": "Right",
    "右下": "Bottom-Right",
    "下": "Bottom",
    "左下": "Bottom-Left",
    "左": "Left",
    "左上": "Top-Left",
    "上": "Top",
    "右上": "Top-Right",
}
ordered_dirs = list(direction_display.keys())

# textbf を $^\star$ に変換
def convert_textbf_to_star(cell):
    if not isinstance(cell, str):
        return cell
    m = re.match(r"\\textbf{([\d.]+)} \(([-+.\d]+)\)", cell)
    if m:
        return f"{m.group(1)} ({m.group(2)})$^\\star$"
    return re.sub(r"\\textbf{([^}]*)}", r"\1", cell).strip()

# --- DRGG-BEBS のみ抽出 ---
df_bebs = df[
    (df["Method A → B"] == "DRGG → DRGGBBoxEmbTFEnc") &
    (df["Beam Width"] == 20)
].copy()
df_bebs["BackboneName"] = df_bebs["Backbone"].map(backbone_name_map)

# --- LaTeX 出力 ---
lines = []
lines.append(r"\begin{tabular}{l|rrrrrrrr}")
lines.append(r"\toprule")
header = (
    r"\multicolumn{1}{l}{Backbone} & " +
    " & ".join([rf"\multicolumn{{1}}{{l}}{{{direction_display[d]}}}" for d in ordered_dirs]) +
    r" \\"
)
lines.append(header)
lines.append(r"\midrule")

for b in ["ResNet-50", "ViT", "Swin", "DiT", "InternImage"]:
    row = df_bebs[df_bebs["BackboneName"] == b]
    if row.empty:
        continue
    vals = [convert_textbf_to_star(row.iloc[0][d]) if d in row.columns else "" for d in ordered_dirs]
    lines.append(f"{b:<15} & " + " & ".join(vals) + r" \\")

lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")

# --- 出力 ---
print("\n".join(lines))


In [None]:
import re
from itertools import groupby

# 表示マップ
backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage",
}
direction_display = {
    "右": "Right", "右下": "Bottom-Right", "下": "Bottom",
    "左下": "Bottom-Left", "左": "Left", "左上": "Top-Left",
    "上": "Top", "右上": "Top-Right"
}
ordered_dirs = list(direction_display.keys())
backbone_order = list(backbone_name_map.values())
decoder_map = {1: "DRGG-BE", 20: "DRGG-BEBS"}

# textbf を $^\star$ に変換
def convert_textbf_to_star(cell):
    if not isinstance(cell, str): return cell
    m = re.match(r"\\textbf{([\d.]+)} \(([-+.\d]+)\)", cell)
    if m:
        return f"{m.group(1)} ({m.group(2)})$^\\star$"
    return re.sub(r"\\textbf{([^}]*)}", r"\1", cell).strip()

# 前処理
df["BackboneDisplay"] = df["Backbone"].map(backbone_name_map)
for d in ordered_dirs:
    if d in df.columns:
        df[d] = df[d].apply(convert_textbf_to_star)

# レコード構築
records = []
for backbone in backbone_order:
    for beam_width in [1, 20]:
        decoder = decoder_map[beam_width]
        row = df[
            (df["BackboneDisplay"] == backbone) &
            (df["Method A → B"] == "DRGG → DRGGBBoxEmbTFEnc") &
            (df["Beam Width"] == beam_width)
        ]
        if row.empty:
            continue
        rec = {"Backbone": backbone, "Decoder": decoder}
        for d in ordered_dirs:
            rec[d] = row.iloc[0][d]
        records.append(rec)

# LaTeX 出力
lines = []
lines.append(r"\begin{tabular}{ll|rrrrrrrr}")
lines.append(r"\toprule")
header = (
    r"\multicolumn{1}{l}{Backbone} & \multicolumn{1}{l}{Decoder} & " +
    " & ".join([rf"\multicolumn{{1}}{{l}}{{{direction_display[d]}}}" for d in ordered_dirs]) +
    r" \\"
)
lines.append(header)
lines.append(r"\midrule")

for backbone, group in groupby(records, key=lambda r: r["Backbone"]):
    group = list(group)
    for i, row in enumerate(group):
        vals = [row[d] for d in ordered_dirs]
        if i == 0:
            lines.append(rf"\multirow{{2}}{{*}}{{{backbone}}} & {row['Decoder']} & " + " & ".join(vals) + r" \\")
        else:
            lines.append(rf"    & {row['Decoder']} & " + " & ".join(vals) + r" \\")
    if backbone != backbone_order[-1]:
        lines.append(r"\midrule")
lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")

# 出力
print("\n".join(lines))


In [None]:
# 方向タイプ（Rootを含む）
direction_types = ["右", "右下", "下", "左下", "左", "左上", "上", "右上", "Root"]

# 日本語→英語対応（Root含む）
jp_to_en = {
    "右": "Right",
    "右下": "Bottom-Right",
    "下": "Bottom",
    "左下": "Bottom-Left",
    "左": "Left",
    "左上": "Top-Left",
    "上": "Top",
    "右上": "Top-Right",
    "Root": "Root"
}
direction_types_en = [jp_to_en[d] for d in direction_types]

# --- backbone ごとの混同行列カウント ---
confusions_per_backbone = {b: defaultdict(Counter) for b in backbones}

method = "DRGGBBoxEmbTFEnc"
beam_width = 20

def truncate_tree_at_node(root: TreeNode, stop_label: str) -> TreeNode | None:
    """
    pred_tree を DFS 順に辿り、stop_label に達したらそれ以降を切り落とした部分木を返す。
    """
    stopped = False  # 単なるフラグ

    def dfs(node: TreeNode) -> TreeNode | None:
        nonlocal stopped  # ← これだけでOK
        if stopped:
            return None
        if node.label == stop_label:
            stopped = True
            return None

        new_node = TreeNode(node.id, node.label, node.bbox, node.category)
        for child in node.children:
            if stopped:
                break
            child_copy = dfs(child)
            if child_copy:
                new_node.children.append(child_copy)
        return new_node

    return dfs(root)

def get_rightmost_path(root):
    path = []
    node = root
    while node and node.children:
        path.append(node.id)
        node = node.children[-1]
    if node:
        path.append(node.id)
    return path

correct_counts_per_backbone = defaultdict(int)
mispred_counts_per_backbone = defaultdict(int)
parent_status_counts = defaultdict(lambda: [0, 0, 0])

for backbone in backbones:
    key = (backbone, method, beam_width)
    if key not in all_preds_dict:
        print(f"[WARN] Missing: {key}")
        continue

    preds = all_preds_dict[key]
    for entry in preds:
        gt = entry["gt_tree"]
        pred = entry["pred_tree"]

        gt_edges = get_edges(gt, skip_root=False)
        pred_edges = get_edges(pred, skip_root=False)
        gt_nodes = build_node_dict(gt)
        pred_nodes = build_node_dict(pred)

        pred_edge_dirs = {}
        for pid, cid in pred_edges:
            if pid in pred_nodes and cid in pred_nodes:
                if pred_nodes[pid].category == -1 or pred_nodes[cid].category == -1:
                    dir_ = "Root"
                else:
                    x1, y1 = bbox_center(pred_nodes[pid].bbox)
                    x2, y2 = bbox_center(pred_nodes[cid].bbox)
                    dir_ = classify_8_directions(x2 - x1, y2 - y1)
                pred_edge_dirs[(pid, cid)] = dir_

        for pid, cid in gt_edges:
            if pid not in gt_nodes or cid not in gt_nodes:
                continue

            if gt_nodes[pid].category == -1 or gt_nodes[cid].category == -1:
                gt_dir = "Root"
            else:
                x1, y1 = bbox_center(gt_nodes[pid].bbox)
                x2, y2 = bbox_center(gt_nodes[cid].bbox)
                gt_dir = classify_8_directions(x2 - x1, y2 - y1)

            if gt_dir not in direction_types:
                continue

            # 正解ならスキップ
            if (pid, cid) in pred_edge_dirs:
                correct_counts_per_backbone[backbone] += 1
                continue

            # 子ノード cid に対する予測方向
            pred_dir = None
            for (ppid, ccid), d in pred_edge_dirs.items():
                if ccid == cid:
                    pred_dir = d
                    break

            if pred_dir not in direction_types:
                continue

            confusions_per_backbone[backbone][gt_dir][pred_dir] += 1
            mispred_counts_per_backbone[backbone] += 1

            # 部分木構築
            partial_tree = truncate_tree_at_node(pred, pred_nodes[cid].label)
            if not partial_tree:
                parent_status_counts[backbone][2] += 1
                continue
        
            rightmost_path = set(get_rightmost_path(partial_tree))
            all_labels_in_tree = set(n.label for n in dfs_all_nodes(partial_tree))
                
            if gt_nodes[pid].label in rightmost_path:
                parent_status_counts[backbone][0] += 1  # right frontier にいた
            elif gt_nodes[pid].label in all_labels_in_tree:
                parent_status_counts[backbone][1] += 1  # 構築済みにいたが frontier にいなかった
            else:
                parent_status_counts[backbone][2] += 1  # まだ構築されていない（読み順の矛盾）

normalized_dfs = []
for backbone, confusion in confusions_per_backbone.items():
    df = pd.DataFrame.from_dict(confusion, orient="index", columns=direction_types).fillna(0).astype(int)
    df = df.reindex(index=direction_types, columns=direction_types).fillna(0).astype(int)
    df_norm = df.div(df.sum(axis=1), axis=0).fillna(0)
    df_norm.index = [jp_to_en[d] for d in df_norm.index]
    df_norm.columns = [jp_to_en[d] for d in df_norm.columns]
    normalized_dfs.append(df_norm)

mean_confusion = sum(normalized_dfs) / len(normalized_dfs)

print("\n=== mispredicted方向割合（backboneごと）===")
mispred_rates = []
for backbone in backbones:
    correct = correct_counts_per_backbone[backbone]
    mispred = mispred_counts_per_backbone[backbone]
    total = correct + mispred
    if total > 0:
        rate = 100 * mispred / total
        print(f"[{backbone}] {rate:.2f}%  ({mispred} / {total})")
        mispred_rates.append(rate)
    else:
        print(f"[{backbone}] データなし")

if mispred_rates:
    print(f"\n=== mispredicted方向割合（backbone平均）: {sum(mispred_rates)/len(mispred_rates):.2f}% ===")
else:
    print("\n=== mispredicted方向割合を計算できません（全backboneでデータなし） ===")

print("\n=== 正解親の構築時点の分類（backboneごと） ===")
avg_ratios = [0, 0, 0]  # [in_frontier, in_tree, not_built]
valid_backbones = 0
for backbone, (in_frontier, in_tree, not_built) in parent_status_counts.items():
    total = in_frontier + in_tree + not_built
    if total == 0:
        continue
    r1 = in_frontier / total * 100
    r2 = in_tree / total * 100
    r3 = not_built / total * 100
    print(f"[{backbone}]")
    print(f"  1. rightmost path に存在       : {in_frontier} ({r1:.2f}%)")
    print(f"  2. rightmost path 以外の木に存在: {in_tree} ({r2:.2f}%)")
    print(f"  3. 正解の親が子より後ろの読み順  : {not_built} ({r3:.2f}%)")
    avg_ratios[0] += r1
    avg_ratios[1] += r2
    avg_ratios[2] += r3
    valid_backbones += 1

if valid_backbones > 0:
    print("\n=== 分類割合（backbone平均） ===")
    print(f"  1. rightmost path に存在       : {avg_ratios[0]/valid_backbones:.2f}%")
    print(f"  2. rightmost path 以外の木に存在: {avg_ratios[1]/valid_backbones:.2f}%")
    print(f"  3. 正解の親が子より後ろの読み順  : {avg_ratios[2]/valid_backbones:.2f}%")
else:
    print("\n=== データなし：backbone 平均は計算不可 ===")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 4))
ax = sns.heatmap(
    mean_confusion,
    # annot=True,
    # fmt=".2f",
    cmap="Blues",
    cbar=True,
    xticklabels=direction_types_en,
    yticklabels=direction_types_en
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=14)

plt.xlabel("Direction of Mispredicted BBox", fontsize=18)
plt.ylabel("Direction of GT BBox", fontsize=18)
plt.xticks(rotation=0, fontsize=10)
plt.yticks(rotation=0, fontsize=18)
plt.tight_layout()
plt.savefig("./figures/parent_child_error_direction.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/parent_child_error_direction.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
records = []

for backbone in backbones:
    for method_A, method_B in [("DRGG", "DRGGBBoxEmbTFEnc")]:
        for beam_width in [1, 20]:
            key_A = (backbone, method_A, beam_width)
            key_B = (backbone, method_B, beam_width)

            if key_A not in all_preds_dict or key_B not in all_preds_dict:
                continue

            preds_A = all_preds_dict[key_A]
            preds_B = all_preds_dict[key_B]

            map_A = {p["file_name"]: p for p in preds_A}
            map_B = {p["file_name"]: p for p in preds_B}
            common_files = set(map_A) & set(map_B)

            for fname in common_files:
                seen = set()
                gt = map_A[fname]["gt_tree"]
                pred_A = map_A[fname]["pred_tree"]
                pred_B = map_B[fname]["pred_tree"]

                node_dict = build_node_dict(gt)
                gt_edges = get_edges(gt, skip_root=False)  # ✅ Root含む
                pred_edges_A = set((e[0], e[1]) for e in get_edges(pred_A, skip_root=False))
                pred_edges_B = set((e[0], e[1]) for e in get_edges(pred_B, skip_root=False))

                for pid, cid in gt_edges:
                    edge_id = (fname, pid, cid)
                    if edge_id in seen:
                        continue
                    seen.add(edge_id)

                    if pid not in node_dict or cid not in node_dict:
                        continue

                    parent = node_dict[pid]
                    child = node_dict[cid]

                    # --- Rootカテゴリの特殊扱い ---
                    if parent.category == -1 or child.category == -1:
                        bin_label = "Root"
                    else:
                        x1, y1 = bbox_center(parent.bbox)
                        x2, y2 = bbox_center(child.bbox)
                        dx = abs(x2 - x1)
                        dy = abs(y2 - y1)

                        if dx >= dy:
                            scale = max(parent.bbox[2] - parent.bbox[0], child.bbox[2] - child.bbox[0])
                            dist = dx
                        else:
                            scale = max(parent.bbox[3] - parent.bbox[1], child.bbox[3] - child.bbox[1])
                            dist = dy

                        if scale <= 0:
                            continue  # 無効スケールはスキップ

                        norm_dist = dist / scale
                        bin_label = norm_dist

                    correct_A = int((pid, cid) in pred_edges_A)
                    correct_B = int((pid, cid) in pred_edges_B)

                    records.append({
                        "Backbone": backbone,
                        "Method A → B": f"{method_A} → {method_B}",
                        "Beam Width": beam_width,
                        "file": fname,
                        "distance": bin_label,  # 数値 or "Root"
                        "Acc A": correct_A,
                        "Acc B": correct_B,
                        "Diff": correct_B - correct_A
                    })


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# --- DataFrame 化 ---
df_dist = pd.DataFrame(records)

# --- ビン定義 ---
bins = [0, 1, 2, 4, 8, 16, np.inf]
labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, \infty)", "Root"]

# --- bin 割り当て ---
df_dist["bin"] = None
mask_numeric = df_dist["distance"] != "Root"
df_dist.loc[mask_numeric, "bin"] = pd.cut(df_dist.loc[mask_numeric, "distance"], bins=bins, labels=labels[:-1])
df_dist.loc[~mask_numeric, "bin"] = "Root"

# --- 集計 ---
grouped = df_dist.groupby(["Backbone", "Method A → B", "Beam Width", "bin"]).agg(
    Count=("Diff", "count"),
    Acc_A=("Acc A", "mean"),
    Acc_B=("Acc B", "mean"),
    Diff=("Diff", "mean")
).reset_index()

# --- パーセント表示列 ---
grouped["Acc A (%)"] = (grouped["Acc_A"] * 100).round(2)
grouped["Acc B (%)"] = (grouped["Acc_B"] * 100).round(2)
grouped["Diff (%)"] = (grouped["Diff"] * 100).round(2)

# --- 出力 ---
# print(grouped.to_string(index=False))

# --- 可視化（Backboneごと、Root含む） ---
target_df = grouped[grouped["Backbone"] == "vitdet_base_4scale"]
target_df["bin"] = pd.Categorical(target_df["bin"], categories=labels, ordered=True)

plt.figure(figsize=(10, 5))
sns.barplot(data=target_df, x="bin", y="Diff (%)", hue="Beam Width")
plt.title("Accuracy Improvement per Distance Bin (vitdet_base_4scale)")
plt.xlabel("Normalized Distance Bin")
plt.ylabel("Accuracy Improvement (%)")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
# --- "acc20 (+Δ)" 形式に整形 ---
grouped["acc_str"] = grouped.apply(
    lambda row: f"{row['Acc B (%)']:.1f} ({row['Diff (%)']:+.1f})", axis=1
)

# --- ピボット: 距離binを列に展開 ---
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, \infty)", "Root"]
df_pivot = grouped.pivot(
    index=["Backbone", "Method A → B", "Beam Width"],
    columns="bin",
    values="acc_str"
).reset_index()

# --- 列順明示 ---
for b in bin_labels:
    if b not in df_pivot.columns:
        df_pivot[b] = ""
df_pivot = df_pivot[["Backbone", "Method A → B", "Beam Width"] + bin_labels]

# --- LaTeX 出力 ---
latex = df_pivot.to_latex(
    index=False,
    escape=False,
    column_format="lll" + "c" * len(bin_labels),
    caption=r"Accuracy@20 per parent-child vertical distance bin, with improvement from Method A. Format: acc20 (+Δ).",
    label="tab:distance_bin_accuracy"
)
# print("\n=== LaTeX ===")
# print(latex)


In [None]:
import numpy as np
from statsmodels.stats.contingency_tables import mcnemar

df = pd.DataFrame(records)

# --- ビン化（数値 + Root） ---
bins = [0, 1, 2, 4, 8, 16, np.inf]
labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, ∞)"]

def assign_bin(val):
    if val == "Root":
        return "Root"
    for i in range(len(bins) - 1):
        if bins[i] < val <= bins[i + 1]:
            return labels[i]
    return None

df["bin"] = df["distance"].apply(assign_bin)
df = df.dropna(subset=["bin"])

# --- McNemar検定と集計 ---
agg_records = []

for (backbone, method_pair, beam_width, bin_label), g in df.groupby(["Backbone", "Method A → B", "Beam Width", "bin"]):
    A = sum((g["Acc A"] == 1) & (g["Acc B"] == 1))
    B = sum((g["Acc A"] == 1) & (g["Acc B"] == 0))
    C = sum((g["Acc A"] == 0) & (g["Acc B"] == 1))
    D = sum((g["Acc A"] == 0) & (g["Acc B"] == 0))

    total = A + B + C + D
    acc_A = (A + B) / total if total > 0 else 0
    acc_B = (A + C) / total if total > 0 else 0
    delta = acc_B - acc_A

    if B + C >= 5:
        try:
            pval = mcnemar([[A, B], [C, D]], exact=True).pvalue
        except Exception as e:
            print(f"[WARN] McNemar failed: {backbone}, {method_pair}, bin={bin_label}: {e}")
            pval = None
    else:
        pval = None

    acc_str = (
        f"\\textbf{{{acc_B * 100:.1f}}} ({delta * 100:+.1f})"
        if pval is not None and pval < 0.05
        else f"{acc_B * 100:.1f} ({delta * 100:+.1f})"
    )

    agg_records.append({
        "Backbone": backbone,
        "Method A → B": method_pair,
        "Beam Width": beam_width,
        "Bin": bin_label,
        "Acc A": round(acc_A * 100, 1),
        "Acc B": round(acc_B * 100, 1),
        "ΔAcc": round(delta * 100, 1),
        "p-value": pval,
        "acc_str": acc_str
    })

# --- DataFrame化 & ピボット ---
df_result = pd.DataFrame(agg_records)
bin_order = labels + ["Root"]
df_pivot = df_result.pivot(index=["Backbone", "Method A → B", "Beam Width"], columns="Bin", values="acc_str").reset_index()
for b in bin_order:
    if b not in df_pivot.columns:
        df_pivot[b] = ""
df_pivot = df_pivot[["Backbone", "Method A → B", "Beam Width"] + bin_order]

# --- 表示 ---
# print("\n=== エッジ距離ビン別 Accuracy@B (+Δ) [p < 0.05 は太字] ===")
# print(df_pivot.to_string(index=False))

# --- LaTeX 出力 ---
latex = df_pivot.to_latex(
    index=False,
    escape=False,
    column_format="lll" + "c" * len(bin_order),
    caption=r"Accuracy@B per GT edge distance bin, comparing method A to B. Values in bold are statistically significant ($p < 0.05$) under McNemar's test.",
    label="tab:gt_edge_distance_accuracy"
)
# print("\n=== LaTeX ===")
# print(latex)


In [None]:
def print_bin_significance(df_result, bin_order):
    print("\n=== 距離ビンごとの有意差 (p < 0.05, McNemar) ===")
    for (backbone, method_pair, beam_width), subdf in df_result.groupby(["Backbone", "Method A → B", "Beam Width"]):
        print(f"[Backbone={backbone} | {method_pair} | Beam={beam_width}]")
        line = "  "
        for b in bin_order:
            pval = subdf[subdf["Bin"] == b]["p-value"]
            if pval.empty:
                mark = "－"
            elif pval.iloc[0] is not None and pval.iloc[0] < 0.05:
                mark = "✔"
            else:
                mark = "✘"
            line += f"{mark} {b:<10} "
        print(line)

print_bin_significance(df_result, bin_order)


In [None]:
import re
import pandas as pd  # 念のため

# --- ビン順序 ---
bin_cols = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, ∞)"]
backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage",
}
backbone_order = list(backbone_name_map.values())

# --- \textbf{...} の太字を外して $^\star$ を付加する関数 ---
def convert_textbf_to_star(cell):
    if not isinstance(cell, str):
        return cell
    # パターン: \textbf{XX.X} (+Y.Y)
    m = re.match(r"\\textbf{([\d.]+)}\s+\(([-+.\d]+)\)", cell)
    if m:
        return f"{m.group(1)} ({m.group(2)})$^\\star$"
    # fallback: 単に textbf を外す
    cell = re.sub(r"\\textbf{([^}]*)}", r"\1", cell)
    cell = re.sub(r"\$\\\star\$|\$\\\^\star\$", "", cell)
    return cell.strip()

# --- 抽出：DRGG → DRGGBBoxEmbTFEnc, Beam 20 (＝BEBS) ---
df_bebs = df_pivot[
    (df_pivot["Method A → B"] == "DRGG → DRGGBBoxEmbTFEnc") &
    (df_pivot["Beam Width"] == 20)
].copy()
df_bebs["BackboneDisplay"] = df_bebs["Backbone"].map(backbone_name_map)

# --- 有意差処理を含むフォーマット変換 ---
for b in bin_cols:
    df_bebs[b] = df_bebs[b].apply(convert_textbf_to_star)

# --- 表示順に並べ替え ---
df_bebs["BackboneDisplay"] = pd.Categorical(df_bebs["BackboneDisplay"], categories=backbone_order, ordered=True)
df_bebs = df_bebs.sort_values("BackboneDisplay")

# --- LaTeX 出力 ---
lines = []
lines.append(r"\begin{tabular}{l|rrrrrr}")
lines.append(r"\toprule")
lines.append(r"Backbone & " + " & ".join(bin_cols) + r" \\")
lines.append(r"\midrule")

for _, row in df_bebs.iterrows():
    vals = [row[b] for b in bin_cols]
    line = f"{row['BackboneDisplay']:<15} & " + " & ".join(vals) + r" \\"
    lines.append(line)

lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")

# --- 出力 ---
print("\n".join(lines))


In [None]:
import re
from itertools import groupby

# 表示順・ラベル
bin_cols = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, ∞)"]
backbone_name_map = {
    "r50_4scale": "ResNet-50",
    "vitdet_base_4scale": "ViT",
    "swin_base_384_4scale": "Swin",
    "dit_base": "DiT",
    "internimage_base_4scale": "InternImage",
}
backbone_order = list(backbone_name_map.values())
decoder_map = {1: "DRGG-BE", 20: "DRGG-BEBS"}

# textbf を $^\star$ に変換
def convert_textbf_to_star(cell):
    if not isinstance(cell, str): return cell
    m = re.match(r"\\textbf{([\d.]+)} \(([-+.\d]+)\)", cell)
    if m:
        return f"{m.group(1)} ({m.group(2)})$^\\star$"
    return re.sub(r"\\textbf{([^}]*)}", r"\1", cell).strip()

# 前処理：変換＋Backbone名整形
df_pivot["BackboneDisplay"] = df_pivot["Backbone"].map(backbone_name_map)
for b in bin_cols:
    if b in df_pivot.columns:
        df_pivot[b] = df_pivot[b].apply(convert_textbf_to_star)

# レコード収集：Method A → B == DRGG → DRGGBBoxEmbTFEnc
records = []
for backbone in backbone_order:
    for beam_width in [1, 20]:
        decoder = decoder_map[beam_width]
        row = df_pivot[
            (df_pivot["BackboneDisplay"] == backbone) &
            (df_pivot["Method A → B"] == "DRGG → DRGGBBoxEmbTFEnc") &
            (df_pivot["Beam Width"] == beam_width)
        ]
        if row.empty:
            continue
        rec = {"Backbone": backbone, "Decoder": decoder}
        for b in bin_cols:
            rec[b] = row.iloc[0][b]
        records.append(rec)

# LaTeX 出力
lines = []
lines.append(r"\begin{tabular}{ll|rrrrrr}")
lines.append(r"\toprule")
lines.append(r"Backbone & Decoder & " + " & ".join(bin_cols) + r" \\")
lines.append(r"\midrule")

for backbone, group in groupby(records, key=lambda x: x["Backbone"]):
    group = list(group)
    for i, row in enumerate(group):
        vals = [row[b] for b in bin_cols]
        if i == 0:
            lines.append(rf"\multirow{{2}}{{*}}{{{backbone}}} & {row['Decoder']} & " + " & ".join(vals) + r" \\")
        else:
            lines.append(rf"    & {row['Decoder']} & " + " & ".join(vals) + r" \\")
    if backbone != backbone_order[-1]:
        lines.append(r"\midrule")
lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")

# 出力
print("\n".join(lines))


In [None]:
from collections import defaultdict, Counter
import pandas as pd
import numpy as np

# bin 定義（+ "Root"）
bins = [0, 1, 2, 4, 8, 16, np.inf]
labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", r"(16, $\infty$)", "Root"]

bin_confusions_per_backbone = {b: defaultdict(Counter) for b in backbones}
method = "DRGGBBoxEmbTFEnc"
beam_width = 20

def truncate_tree_at_node(root: TreeNode, stop_label: str) -> TreeNode | None:
    """
    pred_tree を DFS 順に辿り、stop_label に達したらそれ以降を切り落とした部分木を返す。
    """
    stopped = False  # 単なるフラグ

    def dfs(node: TreeNode) -> TreeNode | None:
        nonlocal stopped  # ← これだけでOK
        if stopped:
            return None
        if node.label == stop_label:
            stopped = True
            return None

        new_node = TreeNode(node.id, node.label, node.bbox, node.category)
        for child in node.children:
            if stopped:
                break
            child_copy = dfs(child)
            if child_copy:
                new_node.children.append(child_copy)
        return new_node

    return dfs(root)

def get_rightmost_path(root):
    path = []
    node = root
    while node and node.children:
        path.append(node.id)
        node = node.children[-1]
    if node:
        path.append(node.id)
    return path

correct_counts_per_backbone = defaultdict(int)
mispred_counts_per_backbone = defaultdict(int)
parent_status_counts = defaultdict(lambda: [0, 0, 0])

for backbone in backbones:
    key = (backbone, method, beam_width)
    if key not in all_preds_dict:
        print(f"[WARN] Missing: {key}")
        continue

    preds = all_preds_dict[key]

    for entry in preds:
        gt = entry["gt_tree"]
        pred = entry["pred_tree"]

        gt_edges = get_edges(gt, skip_root=False)
        pred_edges = get_edges(pred, skip_root=False)

        gt_nodes = build_node_dict(gt)
        pred_nodes = build_node_dict(pred)

        pred_parent_map = {cid: pid for pid, cid in pred_edges}

        for pid, cid in gt_edges:
            if pid not in gt_nodes or cid not in gt_nodes:
                continue

            # --- GT距離 ---
            if gt_nodes[pid].category == -1:
                bin_gt = "Root"
            else:
                x1, y1 = bbox_center(gt_nodes[pid].bbox)
                x2, y2 = bbox_center(gt_nodes[cid].bbox)
                dx, dy = abs(x2 - x1), abs(y2 - y1)

                if dx >= dy:
                    dist_gt = dx
                    scale = max(
                        gt_nodes[pid].bbox[2] - gt_nodes[pid].bbox[0],
                        gt_nodes[cid].bbox[2] - gt_nodes[cid].bbox[0]
                    )
                else:
                    dist_gt = dy
                    scale = max(
                        gt_nodes[pid].bbox[3] - gt_nodes[pid].bbox[1],
                        gt_nodes[cid].bbox[3] - gt_nodes[cid].bbox[1]
                    )

                if scale == 0 or not np.isfinite(dist_gt / scale):
                    bin_gt = "Root"
                else:
                    norm_dist_gt = dist_gt / scale
                    bin_gt = pd.cut([norm_dist_gt], bins=bins, labels=labels[:-1])[0]

            if cid not in pred_parent_map:
                continue
             # --- 正解ならスキップ ---
            if pred_parent_map[cid] == pid:
                correct_counts_per_backbone[backbone] += 1
                continue

            pred_pid = pred_parent_map[cid]

            # --- Pred距離 ---
            if pred_pid not in pred_nodes or pred_nodes[pred_pid].category == -1:
                bin_pred = "Root"
            else:
                px1, py1 = bbox_center(pred_nodes[pred_pid].bbox)
                px2, py2 = bbox_center(pred_nodes[cid].bbox)
                dxp, dyp = abs(px2 - px1), abs(py2 - py1)

                if dxp >= dyp:
                    dist_pred = dxp
                    scale_p = max(
                        pred_nodes[pred_pid].bbox[2] - pred_nodes[pred_pid].bbox[0],
                        pred_nodes[cid].bbox[2] - pred_nodes[cid].bbox[0]
                    )
                else:
                    dist_pred = dyp
                    scale_p = max(
                        pred_nodes[pred_pid].bbox[3] - pred_nodes[pred_pid].bbox[1],
                        pred_nodes[cid].bbox[3] - pred_nodes[cid].bbox[1]
                    )

                if scale_p == 0 or not np.isfinite(dist_pred / scale_p):
                    bin_pred = "Root"
                else:
                    norm_dist_pred = dist_pred / scale_p
                    bin_pred = pd.cut([norm_dist_pred], bins=bins, labels=labels[:-1])[0]

            # --- 集計 ---
            bin_confusions_per_backbone[backbone][bin_gt][bin_pred] += 1
            mispred_counts_per_backbone[backbone] += 1

            # 部分木構築
            partial_tree = truncate_tree_at_node(pred, pred_nodes[cid].label)
            if not partial_tree:
                parent_status_counts[backbone][2] += 1
                continue
        
            rightmost_path = set(get_rightmost_path(partial_tree))
            all_labels_in_tree = set(n.label for n in dfs_all_nodes(partial_tree))
                
            if gt_nodes[pid].label in rightmost_path:
                parent_status_counts[backbone][0] += 1  # right frontier にいた
            elif gt_nodes[pid].label in all_labels_in_tree:
                parent_status_counts[backbone][1] += 1  # 構築済みにいたが frontier にいなかった
            else:
                parent_status_counts[backbone][2] += 1  # まだ構築されていない（読み順の矛盾）

print("\n=== mispredicted距離割合（backboneごと）===")
mispred_rates = []
for backbone in backbones:
    correct = correct_counts_per_backbone[backbone]
    mispred = mispred_counts_per_backbone[backbone]
    total = correct + mispred
    if total > 0:
        rate = 100 * mispred / total
        print(f"[{backbone}] {rate:.2f}%  ({mispred} / {total})")
        mispred_rates.append(rate)
    else:
        print(f"[{backbone}] データなし")

if mispred_rates:
    print(f"\n=== mispredicted方向割合（backbone平均）: {sum(mispred_rates)/len(mispred_rates):.2f}% ===")
else:
    print("\n=== mispredicted方向割合を計算できません（全backboneでデータなし） ===")

print("\n=== 正解親の構築時点の分類（backboneごと） ===")
avg_ratios = [0, 0, 0]  # [in_frontier, in_tree, not_built]
valid_backbones = 0
for backbone, (in_frontier, in_tree, not_built) in parent_status_counts.items():
    total = in_frontier + in_tree + not_built
    if total == 0:
        continue
    r1 = in_frontier / total * 100
    r2 = in_tree / total * 100
    r3 = not_built / total * 100
    print(f"[{backbone}]")
    print(f"  1. rightmost path に存在       : {in_frontier} ({r1:.2f}%)")
    print(f"  2. rightmost path 以外の木に存在: {in_tree} ({r2:.2f}%)")
    print(f"  3. 正解の親が子より後ろの読み順  : {not_built} ({r3:.2f}%)")
    avg_ratios[0] += r1
    avg_ratios[1] += r2
    avg_ratios[2] += r3
    valid_backbones += 1

if valid_backbones > 0:
    print("\n=== 分類割合（backbone平均） ===")
    print(f"  1. rightmost path に存在       : {avg_ratios[0]/valid_backbones:.2f}%")
    print(f"  2. rightmost path 以外の木に存在: {avg_ratios[1]/valid_backbones:.2f}%")
    print(f"  3. 正解の親が子より後ろの読み順  : {avg_ratios[2]/valid_backbones:.2f}%")
else:
    print("\n=== データなし：backbone 平均は計算不可 ===")

In [None]:
import numpy as np
import pandas as pd

# 必ず bin ラベル順に並べる
labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", r"(16, $\infty$)", "Root"]

normalized_dfs = []

for backbone, counter in bin_confusions_per_backbone.items():
    # 元データを DataFrame に変換
    df = pd.DataFrame.from_dict(counter, orient="index", columns=labels).reindex(index=labels, columns=labels)
    df = df.fillna(0).astype(float)

    # 行ごとに正規化（各 GT bin に対する割合）
    row_sums = df.sum(axis=1)
    df_norm = df.div(row_sums, axis=0).replace([np.inf, -np.inf], np.nan)

    normalized_dfs.append(df_norm)

# NaN を無視して平均（セル単位）
stack = np.stack([df.to_numpy() for df in normalized_dfs])
mean_array = np.nanmean(stack, axis=0)
mean_bin_confusion = pd.DataFrame(mean_array, index=labels, columns=labels)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 3.5))
ax = sns.heatmap(
    mean_bin_confusion,
    # annot=True,
    # fmt=".2f",
    cmap="Blues",
    cbar=True,
    xticklabels=labels,
    yticklabels=labels
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=14)
plt.xlabel("Distance of Mispredicted BBox", fontsize=18)
plt.ylabel("Distance of GT BBox", fontsize=18)
plt.xticks(rotation=0, fontsize=18)
plt.yticks(rotation=0, fontsize=18)
plt.tight_layout()
plt.savefig("./figures/parent_child_error_distance.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/parent_child_error_distance.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
id2category = {
    0: "Author_Info",
    1: "Title",
    2: "Figure",
    3: "Text",
    4: "List",
    5: "Section",
    6: "Caption",
    7: "Table",
    8: "Unknown",
    9: "Root",
   -1: "Root"
}

In [None]:
from collections import defaultdict
import pandas as pd

# ----- ID辞書作成 -----
def build_node_dict(node):
    nodes = {}
    def recurse(n):
        nodes[n.label] = n
        for c in n.children:
            recurse(c)
    recurse(node)
    return nodes

# ----- パラメータ -----
backbones = ["r50_4scale", "vitdet_base_4scale", "swin_base_384_4scale", "internimage_base_4scale", "dit_base"]
methods = [("DRGG", "DRGGBBoxEmbTFEnc")]
beam_widths = [1, 20]

# ----- 集計結果 -----
category_stats = defaultdict(lambda: {"total": 0, "correct_A": 0, "correct_B": 0})

for backbone in backbones:
    for method_A, method_B in methods:
        for beam_width in beam_widths:
            key_A = (backbone, method_A, beam_width)
            key_B = (backbone, method_B, beam_width)

            if key_A not in all_preds_dict or key_B not in all_preds_dict:
                print(f"[WARN] Missing: {key_A} or {key_B}")
                continue

            preds_A = all_preds_dict[key_A]
            preds_B = all_preds_dict[key_B]
            map_A = {p["file_name"]: p for p in preds_A}
            map_B = {p["file_name"]: p for p in preds_B}
            common_files = set(map_A) & set(map_B)

            for fname in sorted(common_files):
                gt = map_A[fname]["gt_tree"]
                pred_A = map_A[fname]["pred_tree"]
                pred_B = map_B[fname]["pred_tree"]

                gt_edges = get_edges(gt, skip_root=False)
                pred_edges_A = set(get_edges(pred_A, skip_root=False))
                pred_edges_B = set(get_edges(pred_B, skip_root=False))

                node_dict = build_node_dict(gt)

                for pid, cid in gt_edges:
                    if pid not in node_dict or cid not in node_dict:
                        continue
                    parent, child = node_dict[pid], node_dict[cid]
                    key = (backbone, method_A, method_B, beam_width, f"{id2category[parent.category]} → {id2category[child.category]}")
                    category_stats[key]["total"] += 1
                    category_stats[key]["correct_A"] += int((pid, cid) in pred_edges_A)
                    category_stats[key]["correct_B"] += int((pid, cid) in pred_edges_B)

# ----- 整形と出力 -----
records = []
for (backbone, method_A, method_B, beam_width, pair), stat in category_stats.items():
    total = stat["total"]
    acc_A = stat["correct_A"] / total * 100 if total else 0
    acc_B = stat["correct_B"] / total * 100 if total else 0
    diff = acc_B - acc_A
    records.append({
        "Backbone": backbone,
        "Method A → B": f"{method_A} → {method_B}",
        "Beam Width": beam_width,
        "Parent → Child": pair,
        "Count": total,
        "Acc A (%)": round(acc_A, 2),
        "Acc B (%)": round(acc_B, 2),
        "Diff (%)": round(diff, 2)
    })

df_cat = pd.DataFrame(records)
df_cat = df_cat.sort_values(by=["Backbone", "Beam Width", "Diff (%)"], ascending=[True, True, False])

# # ----- 表示 -----
# pd.set_option("display.max_rows", 100)
# print(df_cat.to_string(index=False))


In [None]:
N = 5  # 上位N件を出力
latex_tables = []

group_cols = ["Backbone", "Method A → B", "Beam Width"]

# グループごとに上位N件抽出
for key, group in df_cat.groupby(group_cols):
    topn = group.sort_values(by="Diff (%)", ascending=False).head(N)
    caption = f"Top-{N} improved parent-child relations for {key[0]}, {key[1]}, Beam Width={key[2]}"
    label = f"tab:top{N}_{key[0]}_{key[1].replace(' ', '')}_bw{key[2]}".lower().replace("→", "to").replace("_", "")
    
    latex = topn.to_latex(
        index=False,
        columns=["Parent → Child", "Count", "Acc A (%)", "Acc B (%)", "Diff (%)"],
        float_format="%.2f",
        caption=caption,
        label=label,
        escape=False,
        column_format="lrrrr"
    )
    latex_tables.append(latex)

# 出力
for t in latex_tables:
    print("\n" + "="*40 + "\n")
    print(t)


In [None]:
from collections import defaultdict, Counter
import pandas as pd

method = "DRGGBBoxEmbTFEnc"
beam_width = 20
parent_confusions_per_backbone = {}

def truncate_tree_at_node(root: TreeNode, stop_label: str) -> TreeNode | None:
    """
    pred_tree を DFS 順に辿り、stop_label に達したらそれ以降を切り落とした部分木を返す。
    """
    stopped = False  # 単なるフラグ

    def dfs(node: TreeNode) -> TreeNode | None:
        nonlocal stopped  # ← これだけでOK
        if stopped:
            return None
        if node.label == stop_label:
            stopped = True
            return None

        new_node = TreeNode(node.id, node.label, node.bbox, node.category)
        for child in node.children:
            if stopped:
                break
            child_copy = dfs(child)
            if child_copy:
                new_node.children.append(child_copy)
        return new_node

    return dfs(root)

def get_rightmost_path(root):
    path = []
    node = root
    while node and node.children:
        path.append(node.id)
        node = node.children[-1]
    if node:
        path.append(node.id)
    return path

correct_counts_per_backbone = defaultdict(int)
mispred_counts_per_backbone = defaultdict(int)
parent_status_counts = defaultdict(lambda: [0, 0, 0])

for backbone in backbones:
    key = (backbone, method, beam_width)
    if key not in all_preds_dict:
        print(f"[WARN] Missing: {key}")
        continue

    preds = all_preds_dict[key]
    confusion = defaultdict(Counter)

    for entry in preds:
        gt = entry["gt_tree"]
        pred = entry["pred_tree"]

        gt_edges = get_edges(gt, skip_root=False)
        pred_edges = get_edges(pred, skip_root=False)

        gt_nodes = build_node_dict(gt)
        pred_nodes = build_node_dict(pred)

        # --- GT: 子→親マップ（labelベース） ---
        gt_parents = {cid: pid for pid, cid in gt_edges}

        # --- Pred: 子→親マップ（labelベース） ---
        pred_parents = {cid: pid for pid, cid in pred_edges}

        for cid_label, pid_gt_label in gt_parents.items():
            if cid_label not in pred_parents:
                continue  # 子ノードが予測に存在しない

            pid_pred_label = pred_parents[cid_label]
            if pid_gt_label == pid_pred_label:
                correct_counts_per_backbone[backbone] += 1
                continue  # 正解なのでスキップ

            # ノード整合性チェック
            # if pid_gt_label not in gt_nodes or pid_pred_label not in pred_nodes:
            #     continue

            cat_gt = id2category[gt_nodes[pid_gt_label].category]
            cat_pred = id2category[pred_nodes[pid_pred_label].category]

            confusion[cat_gt][cat_pred] += 1
            mispred_counts_per_backbone[backbone] += 1
        
            # 部分木構築
            partial_tree = truncate_tree_at_node(pred, cid_label)
            if not partial_tree:
                parent_status_counts[backbone][2] += 1
                continue
        
            rightmost_path = set(get_rightmost_path(partial_tree))
            all_labels_in_tree = set(n.label for n in dfs_all_nodes(partial_tree))

            # # 正解が Section, 予測が Root の場合だけを集計
            # if not(cat_gt == "Section" and cat_pred == "Root"):
            #     continue
                
            if pid_gt_label in rightmost_path:
                parent_status_counts[backbone][0] += 1  # right frontier にいた
            elif pid_gt_label in all_labels_in_tree:
                parent_status_counts[backbone][1] += 1  # 構築済みにいたが frontier にいなかった
            else:
                parent_status_counts[backbone][2] += 1  # まだ構築されていない（読み順の矛盾）

    parent_confusions_per_backbone[backbone] = confusion

# 表示順
category_order = [
    "Root", "Title", "Author Info", "Section",
    "Text", "List", "Figure", "Table", "Caption"
]

# 全カテゴリ収集
all_cats = sorted(set(
    k for conf in parent_confusions_per_backbone.values() for k in conf
).union(
    k2 for conf in parent_confusions_per_backbone.values() for v in conf.values() for k2 in v
))

# 平均処理
normalized_dfs = []
for backbone in backbones:
    conf = parent_confusions_per_backbone.get(backbone, {})
    df = pd.DataFrame.from_dict(conf, orient="index", columns=all_cats).fillna(0).astype(int)
    df = df.reindex(index=all_cats, columns=all_cats).fillna(0)
    df_norm = df.div(df.sum(axis=1), axis=0).fillna(0)
    normalized_dfs.append(df_norm)

mean_confusion = sum(normalized_dfs) / len(normalized_dfs)

# 0 行列削除
nonzero_rows = mean_confusion.sum(axis=1) > 0
nonzero_cols = mean_confusion.sum(axis=0) > 0
mean_confusion = mean_confusion.loc[nonzero_rows, nonzero_cols]

# 表示順に並び替え（存在するカテゴリのみ）
present_rows = [cat for cat in category_order if cat in mean_confusion.index]
present_cols = [cat for cat in category_order if cat in mean_confusion.columns]
mean_confusion = mean_confusion.reindex(index=present_rows, columns=present_cols)

print("\n=== mispredictedカテゴリー割合（backboneごと）===")
mispred_rates = []
for backbone in backbones:
    correct = correct_counts_per_backbone[backbone]
    mispred = mispred_counts_per_backbone[backbone]
    total = correct + mispred
    if total > 0:
        rate = 100 * mispred / total
        print(f"[{backbone}] {rate:.2f}%  ({mispred} / {total})")
        mispred_rates.append(rate)
    else:
        print(f"[{backbone}] データなし")

if mispred_rates:
    print(f"\n=== mispredicted方向割合（backbone平均）: {sum(mispred_rates)/len(mispred_rates):.2f}% ===")
else:
    print("\n=== mispredicted方向割合を計算できません（全backboneでデータなし） ===")

print("\n=== 正解親の構築時点の分類（backboneごと） ===")
avg_ratios = [0, 0, 0]  # [in_frontier, in_tree, not_built]
valid_backbones = 0
for backbone, (in_frontier, in_tree, not_built) in parent_status_counts.items():
    total = in_frontier + in_tree + not_built
    if total == 0:
        continue
    r1 = in_frontier / total * 100
    r2 = in_tree / total * 100
    r3 = not_built / total * 100
    print(f"[{backbone}]")
    print(f"  1. rightmost path に存在       : {in_frontier} ({r1:.2f}%)")
    print(f"  2. rightmost path 以外の木に存在: {in_tree} ({r2:.2f}%)")
    print(f"  3. 正解の親が子より後ろの読み順  : {not_built} ({r3:.2f}%)")
    avg_ratios[0] += r1
    avg_ratios[1] += r2
    avg_ratios[2] += r3
    valid_backbones += 1

if valid_backbones > 0:
    print("\n=== 分類割合（backbone平均） ===")
    print(f"  1. rightmost path に存在       : {avg_ratios[0]/valid_backbones:.2f}%")
    print(f"  2. rightmost path 以外の木に存在: {avg_ratios[1]/valid_backbones:.2f}%")
    print(f"  3. 正解の親が子より後ろの読み順  : {avg_ratios[2]/valid_backbones:.2f}%")
else:
    print("\n=== データなし：backbone 平均は計算不可 ===")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 2.5))
ax = sns.heatmap(
    mean_confusion,
    cmap="Blues",
    cbar=True,
    xticklabels=mean_confusion.columns,
    yticklabels=mean_confusion.index,
    # annot=True,
    # fmt=".2f"
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=14)
plt.xlabel("Category of Mispredicted BBox", fontsize=18)
plt.ylabel("Category\nof GT BBox", fontsize=18)
plt.xticks(rotation=0, fontsize=18)
plt.yticks(rotation=0, fontsize=18)
plt.tight_layout()
plt.savefig("./figures/parent_child_error_category.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/parent_child_error_category.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()
