In [None]:
import os
import json
import itertools
import numpy as np
from collections import defaultdict, Counter
from pathlib import Path
import torch
import matplotlib.pyplot as plt
from collections import deque

from detectron2.utils import comm
from detectron2.evaluation import DatasetEvaluator
from detectron2.structures import pairwise_iou
from scipy.optimize import linear_sum_assignment
from apted import APTED, Config
import pandas as pd

In [None]:
os.makedirs("./figures", exist_ok=True)

In [None]:
from collections import defaultdict, Counter
import os
import json

labels_dir = "/scipostlayout/scipostlayout/DocHieNet/dochienet_dataset/labels/"

images = []
annotations = []
categories = []

category_name_to_id = {}
category_id_to_name = {}
next_category_id = 1
next_annotation_id = 1

doc_id_to_ann_id = defaultdict(dict)
pending_annotations = []

# --- 1. ラベル読み込みと一時アノテーション構築 ---
for fname in sorted(os.listdir(labels_dir)):
    if not fname.endswith(".json"):
        continue

    fpath = os.path.join(labels_dir, fname)
    with open(fpath, "r", encoding="utf-8") as f:
        data = json.load(f)

    base_name = os.path.splitext(fname)[0]

    for page_key, page_info in data.get("pages", {}).items():
        page_num = int(page_key.replace("page", ""))
        image_id = f"{base_name}_page{page_num}"
        images.append({
            "id": image_id,
            "file_name": f"{image_id}.jpg",
            "width": page_info["width"],
            "height": page_info["height"]
        })

    for item in data.get("contents", []):
        x1, y1, x2, y2 = item["box"]
        w, h = x2 - x1, y2 - y1
        image_id = f"{base_name}_page{item['page']}"
        label = item["label"]

        if label not in category_name_to_id:
            cid = next_category_id
            category_name_to_id[label] = cid
            category_id_to_name[cid] = label
            categories.append({"id": cid, "name": label})
            next_category_id += 1

        doc_id = item["id"]
        order = item.get("order", None)
        raw_parent = item["linking"][0][0] if item.get("linking") else None
        if raw_parent == 0:
            raw_parent = None

        ann = {
            "id": next_annotation_id,
            "image_id": image_id,
            "category_id": category_name_to_id[label],
            "category_name": label,
            "bbox": [x1, y1, w, h],
            "area": w * h,
            "iscrowd": 0,
            "docchienet_id": doc_id,
            "raw_parent_id": raw_parent,
            "order": order
        }

        doc_id_to_ann_id[image_id][doc_id] = next_annotation_id
        pending_annotations.append(ann)
        next_annotation_id += 1

# --- 2. parent_id 解決 ---
annotations = []
for ann in pending_annotations:
    raw_pid = ann.pop("raw_parent_id")
    image_id = ann["image_id"]
    if raw_pid is not None and raw_pid in doc_id_to_ann_id[image_id]:
        ann["parent_id"] = doc_id_to_ann_id[image_id][raw_pid]
    else:
        ann["parent_id"] = None
    annotations.append(ann)

# --- 3. アノテーション1件以下の画像を除外 ---
image_id_to_ann_count = Counter()
for ann in annotations:
    image_id_to_ann_count[ann["image_id"]] += 1

excluded_image_ids = {img_id for img_id, count in image_id_to_ann_count.items() if count <= 1}
print(f"除外対象画像数（アノテーション1件以下）: {len(excluded_image_ids)}")

annotations = [ann for ann in annotations if ann["image_id"] not in excluded_image_ids]
images = [img for img in images if img["id"] not in excluded_image_ids]

valid_image_ids = {img["id"] for img in images}
annotations = [ann for ann in annotations if ann["image_id"] in valid_image_ids]

# --- 4. Rootノードの追加 ---
image_id_to_ann_count = Counter(ann["image_id"] for ann in annotations)
eligible_image_ids_for_root = sorted(
    [img_id for img_id, count in image_id_to_ann_count.items() if count >= 2]
)

image_id_to_root_id = {}
for image_id in eligible_image_ids_for_root:
    root_id = next_annotation_id
    root_ann = {
        "id": root_id,
        "image_id": image_id,
        "category_id": -1,
        "category_name": "Root",
        "bbox": [0, 0, 0, 0],
        "area": 0,
        "iscrowd": 0,
        "docchienet_id": None,
        "parents": [],
        "parent_id": None,
        "priority": -1,
    }
    annotations.append(root_ann)
    image_id_to_root_id[image_id] = root_id
    next_annotation_id += 1

# --- 5. 自己ループ除去 ---
for ann in annotations:
    if ann.get("parent_id") == ann["id"]:
        ann["parent_id"] = None

# --- 6. 親がないノードは Root に接続 ---
for ann in annotations:
    if ann["parent_id"] is None and ann["category_name"] != "Root":
        ann["parent_id"] = image_id_to_root_id.get(ann["image_id"])

# --- 7. parent_id → parents, children 構築 ---
id_to_ann = {ann["id"]: ann for ann in annotations}
parent_to_children = defaultdict(list)
for ann in annotations:
    if ann["parent_id"] is not None:
        ann["parents"] = [ann["parent_id"]]
        parent_to_children[ann["parent_id"]].append(ann)
    else:
        ann["parents"] = []

# --- 8. ソート（Root直下ノードは (y, x)、それ以外は order） ---
for parent_id, children in parent_to_children.items():
    parent_ann = id_to_ann[parent_id]
    if parent_ann["category_name"] == "Root":
        children.sort(key=lambda x: (x["bbox"][1], x["bbox"][0]))  # y, x順
    else:
        children.sort(key=lambda x: x.get("order", 9999))

# --- 9. priority 付与（DFS） ---
def build_priority_assigner():
    counter = [0]
    visited = set()
    def assign_priority(node_id):
        if node_id in visited:
            return
        visited.add(node_id)
        ann = id_to_ann[node_id]
        ann["priority"] = counter[0]
        counter[0] += 1
        for child in parent_to_children.get(node_id, []):
            assign_priority(child["id"])
    return assign_priority

assign_priority = build_priority_assigner()
for root_id in image_id_to_root_id.values():
    assign_priority(root_id)

# --- 10. COCO形式で出力構築 ---
coco_data = {
    "images": images,
    "annotations": annotations,
    "categories": categories
}


In [None]:
from collections import defaultdict

img_to_cats = defaultdict(set)
for ann in coco_data["annotations"]:
    img_to_cats[ann["image_id"]].add(ann["category_name"])

only_root_imgs = [img_id for img_id, cats in img_to_cats.items() if cats == {"Root"}]
print(f"✅ Rootだけの画像数（最終確認）: {len(only_root_imgs)}")
assert len(only_root_imgs) == 0, "Rootだけの画像がまだ残っています"


In [None]:
from collections import Counter, defaultdict
import numpy as np

# 画像ID → 画像情報
all_images = {img["id"]: img for img in coco_data["images"]}
all_annotations = coco_data["annotations"]

# category_id → name マップ（あれば fallback 用）
category_id_to_name = {cat["id"]: cat["name"] for cat in coco_data["categories"]}

# --- 除外カテゴリ設定 ---
excluded_names = {"Root", "Unknown"}

# category_name が ann にある前提で処理（なければ category_id_to_name で補う）
filtered_annotations = []
for ann in all_annotations:
    cat_name = ann.get("category_name") or category_id_to_name.get(ann["category_id"], "")
    if cat_name not in excluded_names:
        filtered_annotations.append(ann)

# 使用されている画像のみ抽出
used_image_ids = {ann["image_id"] for ann in filtered_annotations}
filtered_images = {img_id: all_images[img_id] for img_id in used_image_ids}

# --- 画像ごとの BBox 数 ---
image_id_to_count = Counter()
for ann in filtered_annotations:
    image_id_to_count[ann["image_id"]] += 1

bbox_counts_per_image = list(image_id_to_count.values())
total_images = len(filtered_images)
total_annotations = len(filtered_annotations)

print(f"Images: {total_images}")
print(f"Annotations: {total_annotations}")
print(f"Mean BBoxes: {np.mean(bbox_counts_per_image):.2f}")
print(f"Std. BBoxes: {np.std(bbox_counts_per_image):.2f}")

# --- カテゴリごとの BBox 数 ---
category_counts = Counter()
for ann in filtered_annotations:
    cat_name = ann.get("category_name") or category_id_to_name.get(ann["category_id"], f"(id={ann['category_id']})")
    category_counts[cat_name] += 1

print("\nCategory BBoxes:")
for name, count in category_counts.most_common():
    print(f"  {name:20s}: {count}")

# --- 各カテゴリに対する画像ごとの出現数（全画像で平均・標準偏差） ---
all_image_ids = set(filtered_images.keys())
cat_img_to_count = defaultdict(lambda: {img_id: 0 for img_id in all_image_ids})

for ann in filtered_annotations:
    cat_name = ann.get("category_name") or category_id_to_name.get(ann["category_id"], f"(id={ann['category_id']})")
    img_id = ann["image_id"]
    cat_img_to_count[cat_name][img_id] += 1

print("\nMean (Std.) Category BBoxes:")
for cat_name, img_counts in cat_img_to_count.items():
    values = list(img_counts.values())  # 全画像分（0含む）
    mean = np.mean(values)
    std = np.std(values)
    print(f"  {cat_name:20s}: Mean={mean:.2f}, Std.={std:.2f}")


In [None]:
N = 30  # 上位件数

# image_id → BBox 数 の降順ソート
top_images = image_id_to_count.most_common(N)

print(f"\nBBox 数が多い上位 {N} 件の画像:")
for i, (image_id, count) in enumerate(top_images, 1):
    img_info = all_images.get(image_id, {})
    fname = img_info.get("file_name", "(no name)")
    print(f"{i:2d}: image_id={image_id}, file_name={fname}, BBox 数={count}")

In [None]:
class TreeNode:
    def __init__(self, id, label, bbox, category, priority):
        self.id = id
        self.label = label
        self.bbox = bbox
        self.category = category
        self.priority = priority
        # List[TreeNode]
        self.children = []

In [None]:
def build_tree_with_root(annotations):
    """
    Root ノードを必ず根とし、それ以外のノードは 1 親制約の下で構築。
    """

    # ID → annotation と TreeNode の辞書
    id_to_ann = {ann["id"]: ann for ann in annotations}
    id_to_node = {
        ann["id"]: TreeNode(
            id=ann["id"],
            label=ann["id"],
            bbox=ann["bbox"],
            category=ann["category_name"],
            priority=ann["priority"]
        )
        for ann in annotations
    }

    # 親 → 子リンクを構築（parent が存在するノードだけ処理）
    for ann in annotations:
        parent_ids = ann.get("parents", [])
        if parent_ids:
            parent_id = parent_ids[0]  # Root 以外は必ず 1 親
            parent_node = id_to_node[parent_id]
            parent_node.children.append(id_to_node[ann["id"]])

    # すべてのノードで children を priority 昇順に並べる
    for node in id_to_node.values():
        node.children.sort(key=lambda child: child.priority)

    # Root ノードを返す（1つだけ存在する前提）
    for ann in annotations:
        if ann["category_name"] == "Root":
            return id_to_node[ann["id"]]

    raise ValueError("Root ノードが見つかりませんでした。")

In [None]:
def build_trees_per_image(annotations):
    """
    filtered_annotations のようなアノテーションリストを画像ごとにまとめ、
    各画像ごとに木（TreeNode）を構築する。

    戻り値: dict[image_id] = root TreeNode
    """

    # image_id ごとにアノテーションをグループ化
    image_to_anns = defaultdict(list)
    for ann in annotations:
        image_to_anns[ann["image_id"]].append(ann)

    image_to_root = {}

    for image_id, anns in image_to_anns.items():
        try:
            root = build_tree_with_root(anns)
            image_to_root[image_id] = root
        except ValueError as e:
            print(f"[警告] image_id={image_id} で木構築失敗: {e}")

    return image_to_root

In [None]:
# 例: Tree を画像ごとに構築
filtered_annotations = [ann for ann in all_annotations]
image_id_to_root = build_trees_per_image(filtered_annotations)

print(f"構築された木の数: {len(image_id_to_root)}")

# 任意の画像 ID の木を調べる
sample_id = next(iter(image_id_to_root))
tree = image_id_to_root[sample_id]
print(f"Root ID: {tree.id}, label: {tree.label}")

In [None]:
def print_tree(node, indent=0):
    """
    TreeNode を再帰的に表示する（階層構造付き）。
    """
    prefix = "  " * indent
    print(f"{prefix}- ID: {node.id}, Priority: {node.priority}, Category: {node.category}")
    for child in node.children:
        print_tree(child, indent + 1)


In [None]:
# 任意の image_id から木を取得して表示
sample_id = next(iter(image_id_to_root))
tree = image_id_to_root[sample_id]

print(f"[Tree for image_id={sample_id}]")
print_tree(tree)

In [None]:
from collections import deque, defaultdict

def compute_tree_depth_and_width(root):
    """
    与えられた TreeNode に対して、木の最大深さと幅（最大ノード数の階層）を返す。
    幅は親が異なっても同じ階層なら合算。
    """
    max_depth = 0
    level_counts = defaultdict(int)  # depth: node count

    queue = deque([(root, 1)])  # (node, depth)
    while queue:
        node, depth = queue.popleft()
        level_counts[depth] += 1
        max_depth = max(max_depth, depth)
        for child in node.children:
            queue.append((child, depth + 1))

    max_width = max(level_counts.values())

    return max_depth, max_width

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 統計格納
depths = []
widths = []
image_ids = []

# 幅と image_id を対応づけ
image_width_list = []

for image_id, root in image_id_to_root.items():
    depth, width = compute_tree_depth_and_width(root)
    depths.append(depth)
    widths.append(width)
    image_ids.append(image_id)
    image_width_list.append((image_id, width))

# 統計出力
print("Tree statistics:")
print(f"mean depth: {np.mean(depths):.2f}")
print(f"max depth : {np.max(depths)}")
print(f"std. depth: {np.std(depths):.2f}")
print(f"mean width: {np.mean(widths):.2f}")
print(f"max width : {np.max(widths)}")
print(f"std. width: {np.std(widths):.2f}")

children_counts = []

def collect_children_counts(node):
    children_counts.append(len(node.children))
    for child in node.children:
        collect_children_counts(child)

# 全木を対象に集計
for root in image_id_to_root.values():
    collect_children_counts(root)

# 統計計算
mean_children = np.mean(children_counts)
var_children = np.std(children_counts)

print(f"mean children: {mean_children:.2f}")
print(f"std. children: {var_children:.2f}")

In [None]:
# 深さのヒストグラム
# 深さの頻度カウント
depth_counter = Counter(depths)
x = sorted(depth_counter.keys())
y = [depth_counter[d] for d in x]

plt.figure(figsize=(6, 1.5))
plt.bar(x, y, edgecolor="black")
# plt.title("Tree Depth Distribution")
plt.xlabel("Depth")
plt.ylabel("Frequency")
plt.xticks(x)  # 整数に限定
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
# 幅のヒストグラム
# 幅の頻度カウント
width_counter = Counter(widths)
x_vals = sorted(width_counter.keys())
y_vals = [width_counter[w] for w in x_vals]

plt.figure(figsize=(6, 1.5))
plt.bar(x_vals, y_vals, edgecolor="black")
# plt.title("Tree Width Distribution")
plt.xlabel("Width")
plt.ylabel("Frequency")

# x軸の目盛を5ごとに設定
min_x = min(x_vals)
max_x = max(x_vals)
xticks = list(range((min_x // 5) * 5, ((max_x // 5) + 1) * 5 + 1, 10))
plt.xticks(xticks)

plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
from collections import Counter

# 分岐数の頻度カウント
branch_counter = Counter(children_counts)
x_vals = sorted(branch_counter.keys())
y_vals = np.log2([branch_counter[x] for x in x_vals])

# ヒストグラムの棒を描画
plt.figure(figsize=(6, 1.5))
plt.bar(x_vals, y_vals, edgecolor="black")
# plt.title("Distribution of Number of Children per Node")
plt.xlabel("Number of children")
plt.ylabel("Log-scale\nFrequency")

# x軸目盛りを 5 ごとに設定
min_x = min(x_vals)
max_x = max(x_vals)
xticks = list(range((min_x // 5) * 5, ((max_x // 5 + 1) * 5) + 1, 10))
plt.xticks(xticks)

plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
import os
import json
import numpy as np
from collections import Counter, defaultdict

splt_dataset_dir = "/scipostlayout/scipostlayout/poster/png"
splt_json_files = ["train_tree.json", "dev_tree.json", "test_tree.json"]
# splt_json_files = ["test_tree.json"]

# データ統合用
splt_all_images = {}
splt_all_annotations = []
splt_category_id_to_name = {}

# JSON 読み込みと統合
for fname in splt_json_files:
    path = os.path.join(splt_dataset_dir, fname)
    with open(path, "r") as f:
        data = json.load(f)
        # 画像情報
        for img in data.get("images", []):
            splt_all_images[img["id"]] = img
        # アノテーション情報
        splt_all_annotations.extend(data.get("annotations", []))
        # カテゴリ情報（初回のみ登録）
        if not splt_category_id_to_name:
            for cat in data.get("categories", []):
                splt_category_id_to_name[cat["id"]] = cat["name"]

# 「Root」「Unknown」を除外
splt_excluded_names = {"Root", "Unknown"}
splt_excluded_ids = {cat_id for cat_id, name in splt_category_id_to_name.items() if name in splt_excluded_names}
splt_filtered_annotations = [ann for ann in splt_all_annotations if ann["category_id"] not in splt_excluded_ids]

# 対象画像 ID とアノテーションを再構築
splt_used_image_ids = {ann["image_id"] for ann in splt_filtered_annotations}
splt_filtered_images = {img_id: splt_all_images[img_id] for img_id in splt_used_image_ids}

# --- 画像ごとの BBox 数をカウント ---
splt_image_id_to_count = Counter()
for ann in splt_filtered_annotations:
    splt_image_id_to_count[ann["image_id"]] += 1

splt_bbox_counts_per_image = list(splt_image_id_to_count.values())
splt_total_images = len(splt_filtered_images)
splt_total_annotations = len(splt_filtered_annotations)

# print(f"総画像数（Root/Unknown 除外後）: {splt_total_images}")
# print(f"総アノテーション数（BBox 数）: {splt_total_annotations}")
# print(f"画像あたりの平均 BBox 数: {np.mean(splt_bbox_counts_per_image):.2f}")
# print(f"画像あたりの BBox 数の標準偏差: {np.std(splt_bbox_counts_per_image):.2f}")

# --- カテゴリごとの BBox 数カウント ---
splt_category_counts = Counter()
for ann in splt_filtered_annotations:
    splt_category_counts[ann["category_id"]] += 1

# print("\nカテゴリーごとの BBox 数（Root/Unknown を除外）:")
# for cat_id, count in splt_category_counts.most_common():
#     name = splt_category_id_to_name.get(cat_id, f"(id={cat_id})")
#     print(f"  {name:20s}: {count}")

# 全画像ID
splt_all_image_ids = set(splt_filtered_images.keys())

# 画像ごとカテゴリごとのカウント初期化（全ポスターに対して）
splt_cat_img_to_count = defaultdict(lambda: {img_id: 0 for img_id in splt_all_image_ids})

# アノテーションを集計
for ann in splt_filtered_annotations:
    cat_id = ann["category_id"]
    img_id = ann["image_id"]
    splt_cat_img_to_count[cat_id][img_id] += 1

# 統計出力
# print("\n各カテゴリに対する BBox 数（全ポスターで平均・標準偏差）:")
# for cat_id, img_counts in splt_cat_img_to_count.items():
#     name = splt_category_id_to_name.get(cat_id, f"(id={cat_id})")
#     values = list(img_counts.values())  # 全ポスター分の値（0を含む）
#     mean = np.mean(values)
#     std = np.std(values)
#     print(f"  {name:20s}: 平均={mean:.2f}, 標準偏差={std:.2f}")

In [None]:
# 例: Tree を画像ごとに構築
splt_filtered_annotations = [ann for ann in splt_all_annotations]
splt_image_id_to_root = build_trees_per_image(splt_filtered_annotations)

print(f"構築された木の数: {len(splt_image_id_to_root)}")

# 任意の画像 ID の木を調べる
sample_id = next(iter(splt_image_id_to_root))
tree = splt_image_id_to_root[sample_id]
print(f"Root ID: {tree.id}, label: {tree.label}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 統計格納
splt_depths = []
splt_widths = []
splt_image_ids = []

# 幅と image_id を対応づけ
splt_image_width_list = []

for image_id, root in splt_image_id_to_root.items():
    depth, width = compute_tree_depth_and_width(root)
    splt_depths.append(depth)
    splt_widths.append(width)
    splt_image_ids.append(image_id)
    splt_image_width_list.append((image_id, width))

# 統計出力
# print("木構造の統計情報（全画像）:")
# print(f"  木の平均深さ: {np.mean(splt_depths):.2f}")
# print(f"  木の最大深さ: {np.max(splt_depths)}")
# print(f"  木の深さの標準偏差: {np.std(splt_depths):.2f}")
# print(f"  木の平均幅:   {np.mean(splt_widths):.2f}")
# print(f"  木の最大幅:   {np.max(splt_widths)}")
# print(f"  木の幅の標準偏差: {np.std(splt_widths):.2f}")

splt_children_counts = []

def splt_collect_children_counts(node):
    splt_children_counts.append(len(node.children))
    for child in node.children:
        splt_collect_children_counts(child)

# 全木を対象に集計
for root in splt_image_id_to_root.values():
    splt_collect_children_counts(root)

# 統計計算
splt_mean_children = np.mean(splt_children_counts)
splt_std_children = np.std(splt_children_counts)

# print(f"平均 children 数: {splt_mean_children:.2f}")
# print(f"children 数の標準偏差: {splt_std_children:.2f}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter

# --- Tree Depth（そのまま） ---
depth_counter = Counter(depths)
depth_x = sorted(depth_counter.keys())
depth_y = [depth_counter[d] for d in depth_x]

# --- Tree Width：40以上を40にまとめる（カウントはそのまま、棒1本ずつ） ---
width_counter = Counter()
for w in widths:
    if w >= 40:
        width_counter[40] += 1
    else:
        width_counter[w] += 1
width_x = sorted(width_counter.keys())
width_y = [width_counter[x] for x in width_x]

# --- Children per Node：40以上を40にまとめ、logスケール ---
branch_counter = Counter()
for c in children_counts:
    if c >= 40:
        branch_counter[40] += 1
    else:
        branch_counter[c] += 1
branch_x = sorted(branch_counter.keys())
branch_y = [np.log2(branch_counter[x]) for x in branch_x]

# --- プロット ---
fig, axes = plt.subplots(1, 3, figsize=(12, 1.8))

xlabel_fontsize = 20
ylabel_fontsize = 12
tick_fontsize = 20

# --- (a) Tree Depth ---
axes[0].bar(depth_x, depth_y, edgecolor="black")
axes[0].set_title("(a) Tree Depth", fontsize=20)
axes[0].set_xlabel("Depth", fontsize=xlabel_fontsize)
axes[0].set_ylabel("Frequency", fontsize=ylabel_fontsize)
axes[0].set_xticks(depth_x)
axes[0].grid(axis='y')

# --- (b) Tree Width ---
axes[1].bar(width_x, width_y, edgecolor="black")
axes[1].set_title("(b) Tree Width", fontsize=20)
axes[1].set_xlabel("Width", fontsize=xlabel_fontsize)
axes[1].set_ylabel("Frequency", fontsize=ylabel_fontsize)

# x軸ラベル：10区切り + "40+"
xticks_w = [0, 10, 20, 30, 40]
xtick_labels_w = ["0", "10", "20", "30", "40+"]
axes[1].set_xticks(xticks_w)
axes[1].set_xticklabels(xtick_labels_w)
axes[1].grid(axis='y')

# --- (c) Number of Children per Node ---
axes[2].bar(branch_x, branch_y, edgecolor="black")
axes[2].set_title("(c) Number of Children per Node", fontsize=18)
axes[2].set_xlabel("Number of children", fontsize=xlabel_fontsize)
axes[2].set_ylabel("Log-scale\nFrequency", fontsize=ylabel_fontsize)
axes[2].yaxis.set_label_coords(-0.15, 0.2)

# x軸ラベル：10区切り + "40+"
xticks_b = [0, 10, 20, 30, 40]
xtick_labels_b = ["0", "10", "20", "30", "40+"]
axes[2].set_xticks(xticks_b)
axes[2].set_xticklabels(xtick_labels_b)
axes[2].grid(axis='y')

# --- 軸目盛フォントサイズ ---
for ax in axes:
    ax.tick_params(labelsize=tick_fontsize)

# --- 保存と表示 ---
plt.tight_layout()
plt.savefig("./figures/dhn_tree_stats_summary.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/dhn_tree_stats_summary.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter

# === 件数の取得 ===
depth_total = len(depths)
splt_depth_total = len(splt_depths)
width_total = len(widths)
splt_width_total = len(splt_widths)
branch_total = len(children_counts)
splt_branch_total = len(splt_children_counts)

# === Tree Depth（1000画像あたり） ===
depth_counter = Counter(depths)
splt_depth_counter = Counter(splt_depths)
depth_x = sorted(set(depth_counter.keys()) | set(splt_depth_counter.keys()))
depth_y_splt = np.array([splt_depth_counter.get(x, 0) / splt_depth_total * 1000 for x in depth_x])
depth_y_all = np.array([depth_counter.get(x, 0) / depth_total * 1000 for x in depth_x])
depth_y_splt_log = np.log2(depth_y_splt + 1)
depth_y_all_log = np.log2(depth_y_all + 1)

# === Tree Width（40以上→40にまとめ、1000画像あたり） ===
width_counter = Counter(40 if w >= 40 else w for w in widths)
splt_width_counter = Counter(40 if w >= 40 else w for w in splt_widths)
width_x = sorted(set(width_counter.keys()) | set(splt_width_counter.keys()))
width_y_splt = np.array([splt_width_counter.get(x, 0) / splt_width_total * 1000 for x in width_x])
width_y_all = np.array([width_counter.get(x, 0) / width_total * 1000 for x in width_x])
width_y_splt_log = np.log2(width_y_splt + 1)
width_y_all_log = np.log2(width_y_all + 1)

# === Children per Node（40以上→40にまとめ、1000ノードあたり、logスケール） ===
branch_counter = Counter(40 if c >= 40 else c for c in children_counts)
splt_branch_counter = Counter(40 if c >= 40 else c for c in splt_children_counts)
branch_x = sorted(set(branch_counter.keys()) | set(splt_branch_counter.keys()))
branch_y_splt = np.array([splt_branch_counter.get(x, 0) / splt_branch_total * 1000 for x in branch_x])
branch_y_all = np.array([branch_counter.get(x, 0) / branch_total * 1000 for x in branch_x])
branch_y_splt_log = np.log2(branch_y_splt + 1)
branch_y_all_log = np.log2(branch_y_all + 1)

# === プロット ===
fig, axes = plt.subplots(1, 3, figsize=(12, 1.5))
xlabel_fontsize = 14
ylabel_fontsize = 12
tick_fontsize = 14

# --- (a) Tree Depth ---
x_vals = np.array(depth_x)
axes[0].bar(x_vals, depth_y_splt_log, color="blue", edgecolor="black")
axes[0].bar(x_vals, depth_y_all_log, bottom=depth_y_splt_log, color="orange", edgecolor="black", hatch="//")
axes[0].set_title("(a) Tree Depth", fontsize=14)
axes[0].set_xlabel("Depth", fontsize=xlabel_fontsize)
axes[0].set_ylabel("Log-scale\nFrequency", fontsize=ylabel_fontsize)
axes[0].set_xticks(x_vals)
axes[0].grid(axis='y')

# --- (b) Tree Width ---
x_vals = np.array(width_x)
axes[1].bar(x_vals, width_y_splt_log, color="blue", edgecolor="black")
axes[1].bar(x_vals, width_y_all_log, bottom=width_y_splt_log, color="orange", edgecolor="black", hatch="//")
axes[1].set_title("(b) Tree Width", fontsize=14)
axes[1].set_xlabel("Width", fontsize=xlabel_fontsize)
axes[1].set_ylabel("Log-scale\nFrequency", fontsize=ylabel_fontsize)
xticks_w = [0, 10, 20, 30, 40]
xtick_labels_w = ["0", "10", "20", "30", "40+"]
axes[1].set_xticks(xticks_w)
axes[1].set_xticklabels(xtick_labels_w)
axes[1].grid(axis='y')

# --- (c) Number of Children per Node ---
x_vals = np.array(branch_x)
axes[2].bar(x_vals, branch_y_splt_log, color="blue", edgecolor="black")
axes[2].bar(x_vals, branch_y_all_log, bottom=branch_y_splt_log, color="orange", edgecolor="black", hatch="//")
axes[2].set_title("(c) Number of Children per Node", fontsize=14)
axes[2].set_xlabel("Number of children", fontsize=xlabel_fontsize)
axes[2].set_ylabel("Log-scale\nFrequency", fontsize=ylabel_fontsize)
# axes[2].yaxis.set_label_coords(-0.15, 0.2)
xticks_b = [0, 10, 20, 30, 40]
xtick_labels_b = ["0", "10", "20", "30", "40+"]
axes[2].set_xticks(xticks_b)
axes[2].set_xticklabels(xtick_labels_b)
axes[2].grid(axis='y')

# --- 共通設定 ---
for ax in axes:
    ax.tick_params(labelsize=tick_fontsize)

# --- 保存と表示 ---
plt.tight_layout()
plt.savefig("./figures/splt_dhn_tree_stats_summary.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/splt_dhn_tree_stats_summary.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
N = 30  # 上位件数

# 幅で降順ソート
top_images = sorted(image_width_list, key=lambda x: x[1], reverse=True)[:N]

print(f"\n幅が大きい上位 {N} 件の image_id:")
for i, (img_id, width) in enumerate(top_images, 1):
    print(f"{i:2d}: image_id={img_id}, width={width}")

In [None]:
reading_order_counter = Counter()

def dfs_all_nodes(node):
    """
    priority順で全ノードをたどり、pre-order順の線形リストを返す。
    """
    nodes = [node]
    for child in sorted(node.children, key=lambda c: c.priority):
        nodes.extend(dfs_all_nodes(child))
    return nodes

# すべての木に対して線形列を走査し、隣接ペアをカウント
for root in image_id_to_root.values():
    ordered_nodes = dfs_all_nodes(root)
    for i in range(len(ordered_nodes) - 1):
        label1 = ordered_nodes[i].category
        label2 = ordered_nodes[i+1].category
        reading_order_counter[(label1, label2)] += 1


In [None]:
import numpy as np
from math import atan2, degrees
from collections import defaultdict, Counter

# --- bbox中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（45度刻みで8方向） ---
def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360

    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 集計 ---
direction_counter = Counter()
image_direction_stats = defaultdict(Counter)

for image_id, root in image_id_to_root.items():
    ordered_nodes = dfs_all_nodes(root)
    filtered_nodes = [n for n in ordered_nodes if n.category != "Root"]

    for i in range(len(filtered_nodes) - 1):
        n1, n2 = filtered_nodes[i], filtered_nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)
        dx, dy = x2 - x1, y2 - y1

        if dx == 0 and dy == 0:
            continue

        direction = classify_8_directions(dx, dy)
        direction_counter[direction] += 1
        image_direction_stats[image_id][direction] += 1

# --- DataFrame 化 ---
direction_types = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
records = []
for image_id in sorted(image_direction_stats.keys()):
    row = {"image_id": image_id}
    counter = image_direction_stats[image_id]
    for d in direction_types:
        row[d] = counter[d]
    records.append(row)

df_direction = pd.DataFrame(records).fillna(0).set_index("image_id")

# --- 出力 ---
total_all = sum(direction_counter.values())
num_images = len(df_direction)

print("=== 視線移動方向の統計（45度刻み・8方向） ===")
for direction in direction_types:
    total_count = direction_counter[direction]
    total_ratio = total_count / total_all * 100 if total_all > 0 else 0.0

    counts = df_direction[direction].values
    mean_count = counts.sum() / num_images
    std_count = counts.std(ddof=0)

    print(f"{direction:4s}: Total={total_count:6d} ({total_ratio:5.2f}%) | "
          f"画像あたり平均={mean_count:5.2f} / std={std_count:5.2f}")


In [None]:
# 左上方向の移動が多い順にソート
df_direction_sorted = df_direction.sort_values(by="左上", ascending=False)

# 上位10件を表示
print(df_direction_sorted.head(50)[["左上"]])


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# --- Direction labels and corresponding angles (in degrees) ---
direction_labels = ["Right", "Bottom-Right", "Bottom", "Bottom-Left",
                    "Left", "Top-Left", "Top", "Top-Right"]
direction_angles = [0, 45, 90, 135, 180, 225, 270, 315]

# --- Convert degrees to radians for matplotlib ---
theta = np.deg2rad(direction_angles)

# --- Frequencies for each direction ---
radii = [direction_counter[d_jp] for d_jp in ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]]

# --- Polar bar chart ---
fig, ax = plt.subplots(subplot_kw={'projection': 'polar'})
bars = ax.bar(theta, radii, width=np.deg2rad(45), align='center', edgecolor='black')

# --- Polar plot settings ---
ax.set_theta_zero_location("E")  # 0° at the right
ax.set_theta_direction(-1)       # Clockwise

ax.set_xticks(theta)
ax.set_xticklabels(direction_labels, fontsize=12)

ax.set_yticklabels([])  # Hide radial labels (optional)
ax.set_title("Gaze Transition Frequency by Direction (8 Sectors)", fontsize=14)

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from math import sqrt
import numpy as np
from collections import defaultdict

# --- bbox helpers ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

def bbox_diagonal(bbox):
    _, _, w, h = bbox
    return sqrt(w**2 + h**2)

# --- 設定 ---
thresholds = [1, 2, 4, 8, 16]
bin_labels = [f"(0-{thresholds[0]}]"] + \
             [f"({thresholds[i-1]}-{thresholds[i]}]" for i in range(1, len(thresholds))] + \
             [f">{thresholds[-1]}"]

# --- 結果格納 ---
image_id_to_dists = defaultdict(list)

# --- データ走査（読み順ペアごとに距離を計算） ---
for image_id, root in image_id_to_root.items():
    nodes = [n for n in dfs_all_nodes(root) if n.category != "Root"]
    for i in range(len(nodes) - 1):
        n1, n2 = nodes[i], nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)

        dx = abs(x2 - x1)
        dy = abs(y2 - y1)

        # 支配方向によるスケーリング
        if dx >= dy:
            scale = max(n1.bbox[2], n2.bbox[2])  # width
            dist = dx
        else:
            scale = max(n1.bbox[3], n2.bbox[3])  # height
            dist = dy

        if scale > 0:
            normalized = dist / scale
            image_id_to_dists[image_id].append(normalized)

# --- 正規化距離の配列化 ---
all_values = [d for dists in image_id_to_dists.values() for d in dists]
values = np.array(all_values)

# --- 基本統計 ---
print("\n=== 読み順における相対距離統計 ===")
print(f"平均倍率     : {values.mean():.3f}")
print(f"標準偏差     : {values.std():.3f}")
print(f"中央値       : {np.median(values):.3f}")
print(f"最大 / 最小 : {values.max():.3f} / {values.min():.3f}")

# --- 区間ごとの画像単位の出現回数統計 ---
print("\n=== 区間ごとの画像あたり平均出現数 / 標準偏差 ===")
bin_image_counts = {label: [] for label in bin_labels}

starts = [0] + thresholds
ends = thresholds + [np.inf]

for dists in image_id_to_dists.values():
    dists = np.array(dists)
    for label, s, e in zip(bin_labels, starts, ends):
        count = ((dists > s) & (dists <= e)).sum()
        bin_image_counts[label].append(count)

for label in bin_labels:
    arr = np.array(bin_image_counts[label])
    print(f"{label:>7} : 平均 = {arr.mean():5.2f} / std = {arr.std():5.2f}")

# --- 区間ごとの全体統計（数と割合） ---
print("\n=== 読み順距離の区間ごとの全体カウントと比率 ===")
start = 0
total = len(values)
for t in thresholds:
    count = ((values > start) & (values <= t)).sum()
    percent = count / total * 100
    print(f"{start:>2} < x ≤ {t:<2} : {count:6d} 組 | 比率: {percent:5.1f}%")
    start = t
overflow = (values > thresholds[-1]).sum()
percent = overflow / total * 100
print(f"x > {thresholds[-1]:<2}     : {overflow:6d} 組 | 比率: {percent:5.1f}%")

# --- ヒストグラム描画 ---
plt.figure(figsize=(8, 5))
plt.hist(values, bins=[0] + thresholds + [np.inf], edgecolor='black')
plt.xlabel("Normalized distance between adjacent nodes in reading order")
plt.ylabel("Count")
plt.title("Histogram of normalized distances (DFS reading order)")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
from collections import defaultdict

# 方向ラベルの順番を固定（classify_8_directions の出力順と一致させる）
direction_labels = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
direction_to_idx = {label: i for i, label in enumerate(direction_labels)}

# 距離ビン設定
thresholds = [1, 2, 4, 8, 16]
bin_edges = [0] + thresholds + [np.inf]  # len = 7 → 6ビン
num_dirs = len(direction_labels)
num_bins = len(bin_edges) - 1

# 結果格納：8方向 × 6距離ビン
heatmap = np.zeros((num_dirs, num_bins), dtype=int)

for image_id, root in image_id_to_root.items():
    nodes = [n for n in dfs_all_nodes(root) if n.category != "Root"]
    for i in range(len(nodes) - 1):
        n1, n2 = nodes[i], nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)
        dx, dy = x2 - x1, y2 - y1

        # 同一位置はスキップ
        if dx == 0 and dy == 0:
            continue

        # --- 方向分類 ---
        direction = classify_8_directions(dx, dy)
        dir_idx = direction_to_idx[direction]

        # --- 相対距離（スケール正規化）---
        if abs(dx) >= abs(dy):
            scale = max(n1.bbox[2], n2.bbox[2])  # width
            dist = abs(dx)
        else:
            scale = max(n1.bbox[3], n2.bbox[3])  # height
            dist = abs(dy)
        if scale == 0:
            continue
        norm_dist = dist / scale

        # --- 距離ビン分類 ---
        bin_idx = np.digitize(norm_dist, bin_edges) - 1  # bin_edges[i-1] < x ≤ bin_edges[i]
        bin_idx = min(bin_idx, num_bins - 1)  # 安全措置

        # --- カウント ---
        heatmap[dir_idx, bin_idx] += 1

In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l" + "r" * heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, heatmap):
    row_str = " & ".join(f"{v:,}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Reading Order frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:reading_order_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ----------------------------------------
# 入力 heatmap（8方向 × 6距離ビン）
# 方向順：["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
# ----------------------------------------
# heatmap = np.array(...) などで与えてください
# log_heatmap = np.log2(heatmap / float(len(image_id_to_root)) + 1)  # shape: (8, 6)
log_heatmap = np.log2(heatmap / float(len(image_id_to_root)) * 1000 + 1)

# ----------------------------------------
# データ拡大（補間なし、コピーで繰り返し）
# ----------------------------------------
repeat_factor = 17  # 方向×距離ビンともに4倍（調整可）
Z = log_heatmap.T  # shape: (6, 8) → (rows: distance, cols: direction)
Z_repeat = np.repeat(np.repeat(Z, repeat_factor, axis=0), repeat_factor, axis=1)  # (24, 32)
Z_repeat = np.roll(Z_repeat, shift=-repeat_factor//2+1, axis=1)

# ----------------------------------------
# 極座標グリッド生成
# ----------------------------------------
theta_edges = np.linspace(0, 2 * np.pi, Z_repeat.shape[1] + 1) - (np.pi / Z_repeat.shape[1])
r_edges = np.linspace(0, 6, Z_repeat.shape[0] + 1)  # max bin index = 6
# r_edges = np.logspace(1e-10, np.log2(2**6), Z_repeat.shape[0] + 1, base=2)

Theta, R = np.meshgrid(theta_edges, r_edges)

# ----------------------------------------
# 描画
# ----------------------------------------
fig, ax = plt.subplots(subplot_kw=dict(polar=True), figsize=(8, 8))
ax.set_theta_direction(-1)  # 時計回りに描画
ax.set_theta_offset(0)      # θ=0 を右（3時）に固定

pcm = ax.pcolormesh(Theta, R, Z_repeat, cmap='Blues', shading='auto', vmin=0, vmax=13)

# ----------------------------------------
# ラベル（方向）＋平均値（実数）表示
# ----------------------------------------
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]

# 実数値の平均（距離方向に平均）
dir_means_real = heatmap.sum(axis=1) / float(len(image_id_to_root))

# ラベルに平均値を追加（小数点1桁）
direction_labels_with_mean = [
    f"{label}\n{mean:.2f}" for label, mean in zip(direction_labels, dir_means_real)
]

tick_angles = np.linspace(0, 2 * np.pi, 8, endpoint=False)
ax.set_xticks(tick_angles)
ax.set_xticklabels(direction_labels_with_mean, fontsize=28)

# y軸（距離ビンラベル）
ax.set_yticks(np.arange(0, 7))
tick_texts = ax.set_yticklabels(["0", "1", "2", "4", "8", "16", r"$\infty$"], fontsize=28)
tick_texts[-1].set_fontsize(36)

# ----------------------------------------
# カラーバー
# ----------------------------------------
cbar = plt.colorbar(pcm, ax=ax, pad=0.12, shrink=0.7)
cbar.ax.tick_params(labelsize=28)
# cbar.set_label("log₂(count + 1)", fontsize=12)

# ----------------------------------------
# 表示・保存
# ----------------------------------------
ax.grid(False)
plt.tight_layout()
plt.savefig("./figures/dhn_reading_order_rose_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/dhn_reading_order_rose_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
import numpy as np
from collections import defaultdict

# 方向ラベルの順番を固定
direction_labels = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
direction_to_idx = {label: i for i, label in enumerate(direction_labels)}

# 距離ビン設定
thresholds = [1, 2, 4, 8, 16]
bin_edges = [0] + thresholds + [np.inf]
num_dirs = len(direction_labels)
num_bins = len(bin_edges) - 1

# 結果格納
splt_heatmap = np.zeros((num_dirs, num_bins), dtype=int)

# ★ splt_image_id_to_root を使用する点が唯一の本質的な変更
for splt_image_id, splt_root in splt_image_id_to_root.items():
    nodes = [n for n in dfs_all_nodes(splt_root) if n.category != "Root"]
    for i in range(len(nodes) - 1):
        n1, n2 = nodes[i], nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)
        dx, dy = x2 - x1, y2 - y1

        if dx == 0 and dy == 0:
            continue

        direction = classify_8_directions(dx, dy)
        dir_idx = direction_to_idx[direction]

        if abs(dx) >= abs(dy):
            scale = max(n1.bbox[2], n2.bbox[2])
            dist = abs(dx)
        else:
            scale = max(n1.bbox[3], n2.bbox[3])
            dist = abs(dy)

        if scale == 0:
            continue
        norm_dist = dist / scale

        bin_idx = np.digitize(norm_dist, bin_edges) - 1
        bin_idx = min(bin_idx, num_bins - 1)

        splt_heatmap[dir_idx, bin_idx] += 1


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import gridspec

# 正規化して log2変換（1000画像あたり）
log_splt_heatmap = np.log2(splt_heatmap / float(len(splt_image_id_to_root)) * 1000 + 1)
log_heatmap = np.log2(heatmap / float(len(image_id_to_root)) * 1000 + 1)

# データ拡大（方向・距離を滑らかに表示）
def prepare_rose_matrix(log_map):
    Z = log_map.T  # shape: (6, 8)
    repeat_factor = 17
    Z_repeat = np.repeat(np.repeat(Z, repeat_factor, axis=0), repeat_factor, axis=1)
    Z_repeat = np.roll(Z_repeat, shift=-repeat_factor//2+1, axis=1)
    return Z_repeat

Z1 = prepare_rose_matrix(log_splt_heatmap)
Z2 = prepare_rose_matrix(log_heatmap)

# 極座標グリッド
theta_edges = np.linspace(0, 2 * np.pi, Z1.shape[1] + 1) - (np.pi / Z1.shape[1])
r_edges = np.linspace(0, 6, Z1.shape[0] + 1)
Theta, R = np.meshgrid(theta_edges, r_edges)

# 平均値の方向別表示ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
tick_angles = np.linspace(0, 2 * np.pi, 8, endpoint=False)

splt_dir_means = splt_heatmap.sum(axis=1) / float(len(splt_image_id_to_root))
doc_dir_means = heatmap.sum(axis=1) / float(len(image_id_to_root))

splt_labels = [f"{label}\n{val:.2f}" for label, val in zip(direction_labels, splt_dir_means)]
doc_labels = [f"{label}\n{val:.2f}" for label, val in zip(direction_labels, doc_dir_means)]

# --- Figure & GridSpec 定義 ---
fig = plt.figure(figsize=(16, 8))
gs = gridspec.GridSpec(1, 3, width_ratios=[1, 0.05, 1], wspace=0.3)  # ★ wspace で中央に空間確保

# 左：SciPostLayoutTree
ax_left = fig.add_subplot(gs[0, 0], polar=True)
ax_left.set_theta_direction(-1)
ax_left.set_theta_offset(0)
ax_left.grid(False)

# 右：DocHieNet
ax_right = fig.add_subplot(gs[0, 2], polar=True)
ax_right.set_theta_direction(-1)
ax_right.set_theta_offset(0)
ax_right.grid(False)

# 描画（共通カラーマップ）
pcm1 = ax_left.pcolormesh(Theta, R, Z1, cmap='Blues', shading='auto', vmin=0, vmax=13)
pcm2 = ax_right.pcolormesh(Theta, R, Z2, cmap='Blues', shading='auto', vmin=0, vmax=13)

# ラベル（方向）
tick_angles = np.linspace(0, 2 * np.pi, 8, endpoint=False)
ax_left.set_xticks(tick_angles)
ax_left.set_xticklabels(splt_labels, fontsize=28)
ax_right.set_xticks(tick_angles)
ax_right.set_xticklabels(doc_labels, fontsize=28)

# y軸ラベル
for ax in [ax_left, ax_right]:
    ax.set_yticks(np.arange(0, 7))
    ticks = ax.set_yticklabels(["0", "1", "2", "4", "8", "16", r"$\infty$"], fontsize=28)
    ticks[-1].set_fontsize(36)

# 中央カラーバー（専用 subplot 使用）
cax = fig.add_subplot(gs[0, 1])  # 中央の narrow column
cbar = fig.colorbar(pcm1, cax=cax)
cbar.ax.tick_params(labelsize=28)

# 保存・表示
plt.savefig("./figures/splt_dhn_reading_order_rose_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/splt_dhn_reading_order_rose_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
from scipy.stats import norm
import numpy as np

poster_pages = len(splt_image_id_to_root)
doc_pages = len(image_id_to_root)

directions, distances = splt_heatmap.shape  # shape = (8方向, 6距離)
p_values = np.full((directions, distances), np.nan)
significant = np.zeros((directions, distances), dtype=bool)

for i in range(directions):      # i = 方向
    for j in range(distances):   # j = 距離ビン
        a1 = splt_heatmap[i, j]
        a2 = heatmap[i, j]
        n1 = poster_pages
        n2 = doc_pages

        r1 = a1 / n1
        r2 = a2 / n2
        se = np.sqrt(r1 / n1 + r2 / n2)

        if se == 0:
            p = 1.0 if r1 == r2 else 0.0
        else:
            z = (r1 - r2) / se
            p = 2 * (1 - norm.cdf(abs(z)))

        p_values[i, j] = p
        significant[i, j] = p < 0.05

direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
distance_labels = ["(0,1]", "(1,2]", "(2,4]", "(4,8]", "(8,16]", "(16,∞)"]

print("\n=== ページあたり出現率に有意差なし (p ≥ 0.05, Poisson z-test) ===")
for i in range(directions):
    for j in range(distances):
        if not significant[i, j]:
            direc = direction_labels[i]
            dist = distance_labels[j]
            print(f"{direc} × {dist}: p = {p_values[i, j]:.4f}")

In [None]:
from scipy.stats import norm
import numpy as np

poster_pages = len(splt_image_id_to_root)
doc_pages = len(image_id_to_root)

rows = splt_heatmap.shape[0]  # directions
print(rows)
p_values = np.full(rows, np.nan)
significant = np.zeros(rows, dtype=bool)

for i in range(rows):
    a1 = splt_heatmap[i, :].sum()
    a2 = heatmap[i, :].sum()

    n1 = poster_pages
    n2 = doc_pages

    r1 = a1 / n1
    r2 = a2 / n2

    # 標準誤差（ポアソンrateの差に基づく）
    se = np.sqrt(r1 / n1 + r2 / n2)

    if se == 0:
        p = 1.0 if r1 == r2 else 0.0
    else:
        z = (r1 - r2) / se
        p = 2 * (1 - norm.cdf(abs(z)))

    p_values[i] = p
    significant[i] = p < 0.05

direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]

print("\n=== 方向ごとのページあたり出現率に有意差なし (p ≥ 0.05) ===")
for i in range(rows):
    if not significant[i]:
        print(f"{direction_labels[i]}: p = {p_values[i]:.4f}")


In [None]:
from collections import defaultdict, Counter
from math import atan2, degrees
import pandas as pd

# --- bbox 中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（8方向） ---
def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 方向カテゴリ（固定順） ---
direction_types = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]

# --- 集計用 ---
parent_child_direction_counter = Counter()
image_parent_child_direction_stats = defaultdict(Counter)

# --- 全体集計 ---
for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx = x2 - x1
            dy = y2 - y1

            direction = classify_8_directions(dx, dy)
            if direction == "不明":
                continue

            parent_child_direction_counter[direction] += 1
            image_parent_child_direction_stats[image_id][direction] += 1

# --- DataFrame 化 ---
records = []
for image_id in sorted(image_parent_child_direction_stats.keys()):
    row = {"image_id": image_id}
    for direction in direction_types:
        row[direction] = image_parent_child_direction_stats[image_id][direction]
    records.append(row)

df_parent_child_dir = pd.DataFrame(records).set_index("image_id")

# --- 出力 ---
total_all = sum(parent_child_direction_counter.values())
print("=== 親子関係における視線移動方向（8方向）の統計 ===")
for direction in direction_types:
    total_count = parent_child_direction_counter[direction]
    total_ratio = total_count / total_all * 100 if total_all > 0 else 0.0

    counts = df_parent_child_dir[direction].values
    mean_count = counts.mean()
    std_count = counts.std(ddof=0)

    print(f"{direction:4s}: "
          f"Total={total_count:6d} 回 ({total_ratio:5.2f}%) | "
          f"画像あたり平均={mean_count:5.2f} / std={std_count:5.2f}")


In [None]:
import matplotlib.pyplot as plt
from math import sqrt
import numpy as np
from collections import defaultdict

# --- bbox helper ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

def bbox_diagonal(bbox):
    _, _, w, h = bbox
    return sqrt(w**2 + h**2)

# --- 設定 ---
thresholds = [1, 2, 4, 8, 16]
bin_labels = [f"(0-{thresholds[0]}]"] + \
             [f"({thresholds[i-1]}-{thresholds[i]}]" for i in range(1, len(thresholds))] + \
             [f">{thresholds[-1]}"]

# --- データ収集 ---
normalized_distances = []
image_id_to_dists = defaultdict(list)

for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx = abs(x2 - x1)
            dy = abs(y2 - y1)

            # 支配方向でのスケーリング
            if dx >= dy:
                scale = max(parent.bbox[2], child.bbox[2])  # bbox width
                dist = dx
            else:
                scale = max(parent.bbox[3], child.bbox[3])  # bbox height
                dist = dy

            if scale > 0:
                normalized = dist / scale
                normalized_distances.append(normalized)
                image_id_to_dists[image_id].append(normalized)

# --- 全体統計 ---
values = np.array(normalized_distances)
print("\n=== 親子関係における相対距離統計 ===")
print(f"平均倍率     : {values.mean():.3f}")
print(f"標準偏差     : {values.std():.3f}")
print(f"中央値       : {np.median(values):.3f}")
print(f"最大 / 最小 : {values.max():.3f} / {values.min():.3f}")

# --- 区間ごとの全体カウントと比率 ---
print("\n=== 親子距離の区間ごとのカウントと比率 ===")
start = 0
total = len(values)
starts = [0] + thresholds
ends = thresholds + [np.inf]
for s, e in zip(starts, ends):
    count = ((values > s) & (values <= e)).sum()
    percent = count / total * 100
    label = f"{s} < x ≤ {e}" if e != np.inf else f"x > {s}"
    print(f"{label:<13} : {count:6d} 組 | 比率: {percent:5.1f}%")

# --- 区間ごとの画像あたりの出現数の平均 / 標準偏差 ---
print("\n=== 区間ごとの画像あたり平均出現数 / 標準偏差 ===")
bin_image_counts = {label: [] for label in bin_labels}

for dists in image_id_to_dists.values():
    dists = np.array(dists)
    for label, s, e in zip(bin_labels, starts, ends):
        count = ((dists > s) & (dists <= e)).sum()
        bin_image_counts[label].append(count)

for label in bin_labels:
    arr = np.array(bin_image_counts[label])
    print(f"{label:>7} : 平均 = {arr.mean():5.2f} / std = {arr.std():5.2f}")

# --- ヒストグラム描画 ---
plt.figure(figsize=(8, 5))
plt.hist(values, bins=[0] + thresholds + [np.inf], edgecolor='black')
plt.xlabel("Normalized parent-child distance (× bbox size)")
plt.ylabel("Count")
plt.title("Histogram of normalized distances between parent and child nodes")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
from collections import defaultdict, Counter
from math import atan2, degrees, sqrt
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable

# --- bbox 中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（8方向） ---
def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 定義 ---
direction_labels = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
direction_to_index = {d: i for i, d in enumerate(direction_labels)}
thresholds = [1, 2, 4, 8, 16]
bin_edges = [0] + thresholds + [float('inf')]
bin_count = len(bin_edges) - 1

# --- カウント行列（方向 × 距離ビン）
heatmap = np.zeros((8, bin_count), dtype=int)

# --- 親子ノード関係からカウント
for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx, dy = x2 - x1, y2 - y1
            if dx == 0 and dy == 0:
                continue

            direction = classify_8_directions(dx, dy)
            if direction == "不明":
                continue
            dir_idx = direction_to_index[direction]

            # 正規化距離（支配方向）
            dx_abs, dy_abs = abs(dx), abs(dy)
            if dx_abs >= dy_abs:
                scale = max(parent.bbox[2], child.bbox[2])  # width
                dist = dx_abs
            else:
                scale = max(parent.bbox[3], child.bbox[3])  # height
                dist = dy_abs

            if scale == 0:
                continue
            norm_dist = dist / scale

            # ビンに分類
            bin_idx = np.digitize(norm_dist, bin_edges) - 1
            bin_idx = min(bin_idx, bin_count - 1)
            heatmap[dir_idx, bin_idx] += 1

In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l" + "r" * heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, heatmap):
    row_str = " & ".join(f"{v:,}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Parent-child frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:parent_child_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ----------------------------------------
# 入力 heatmap（8方向 × 6距離ビン）
# 方向順：["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
# ----------------------------------------
# heatmap = np.array(...) などで与えてください
# log_heatmap = np.log2(heatmap + 1)  # shape: (8, 6)
log_heatmap = np.log2(heatmap / float(len(image_id_to_root)) * 1000 + 1)

# ----------------------------------------
# データ拡大（補間なし、コピーで繰り返し）
# ----------------------------------------
repeat_factor = 17  # 方向×距離ビンともに4倍（調整可）
Z = log_heatmap.T  # shape: (6, 8) → (rows: distance, cols: direction)
Z_repeat = np.repeat(np.repeat(Z, repeat_factor, axis=0), repeat_factor, axis=1)  # (24, 32)
Z_repeat = np.roll(Z_repeat, shift=-repeat_factor//2+1, axis=1)

# ----------------------------------------
# 極座標グリッド生成
# ----------------------------------------
theta_edges = np.linspace(0, 2 * np.pi, Z_repeat.shape[1] + 1) - (np.pi / Z_repeat.shape[1])
r_edges = np.linspace(0, 6, Z_repeat.shape[0] + 1)  # max bin index = 6
# r_edges = np.logspace(1e-10, np.log2(2**6), Z_repeat.shape[0] + 1, base=2)

Theta, R = np.meshgrid(theta_edges, r_edges)

# ----------------------------------------
# 描画
# ----------------------------------------
fig, ax = plt.subplots(subplot_kw=dict(polar=True), figsize=(8, 8))
ax.set_theta_direction(-1)  # 時計回りに描画
ax.set_theta_offset(0)      # θ=0 を右（3時）に固定

pcm = ax.pcolormesh(Theta, R, Z_repeat, cmap='Blues', shading='auto', vmin=0, vmax=12)

# ----------------------------------------
# ラベル（方向）＋平均値（実数）表示
# ----------------------------------------
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]

# 実数値の平均（距離方向に平均）
dir_means_real = heatmap.sum(axis=1) / float(len(image_id_to_root))

# ラベルに平均値を追加（小数点1桁）
direction_labels_with_mean = [
    f"{label}\n{mean:.2f}" for label, mean in zip(direction_labels, dir_means_real)
]

tick_angles = np.linspace(0, 2 * np.pi, 8, endpoint=False)
ax.set_xticks(tick_angles)
ax.set_xticklabels(direction_labels_with_mean, fontsize=28)

# y軸（距離ビンラベル）
ax.set_yticks(np.arange(0, 7))
tick_texts = ax.set_yticklabels(["0", "1", "2", "4", "8", "16", r"$\infty$"], fontsize=28)
tick_texts[-1].set_fontsize(36)

# ----------------------------------------
# カラーバー
# ----------------------------------------
cbar = plt.colorbar(pcm, ax=ax, pad=0.12, shrink=0.7)
cbar.ax.tick_params(labelsize=28)
# cbar.set_label("log₂(count + 1)", fontsize=12)

# ----------------------------------------
# 表示・保存
# ----------------------------------------
ax.grid(False)
plt.tight_layout()
plt.savefig("./figures/dhn_parent_child_rose_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/dhn_parent_child_rose_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
import numpy as np
from math import atan2, degrees

# --- bbox 中心 ---
def splt_bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（8方向） ---
def splt_classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 定義 ---
splt_direction_labels = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
splt_direction_to_index = {d: i for i, d in enumerate(splt_direction_labels)}
splt_thresholds = [1, 2, 4, 8, 16]
splt_bin_edges = [0] + splt_thresholds + [float('inf')]
splt_bin_count = len(splt_bin_edges) - 1

# --- カウント行列（方向 × 距離ビン）
splt_heatmap = np.zeros((8, splt_bin_count), dtype=int)

# --- splt_filtered_images に含まれる画像のみ対象 ---
for splt_image_id in splt_filtered_images.keys():
    splt_root = splt_image_id_to_root[splt_image_id]
    splt_nodes = dfs_all_nodes(splt_root)

    for splt_parent in splt_nodes:
        if splt_parent.category == "Root":
            continue
        for splt_child in splt_parent.children:
            if splt_child.category == "Root":
                continue

            x1, y1 = splt_bbox_center(splt_parent.bbox)
            x2, y2 = splt_bbox_center(splt_child.bbox)
            splt_dx, splt_dy = x2 - x1, y2 - y1
            if splt_dx == 0 and splt_dy == 0:
                continue

            splt_direction = splt_classify_8_directions(splt_dx, splt_dy)
            if splt_direction == "不明":
                continue
            splt_dir_idx = splt_direction_to_index[splt_direction]

            # 正規化距離（支配方向）
            dx_abs, dy_abs = abs(splt_dx), abs(splt_dy)
            if dx_abs >= dy_abs:
                splt_scale = max(splt_parent.bbox[2], splt_child.bbox[2])  # width
                splt_dist = dx_abs
            else:
                splt_scale = max(splt_parent.bbox[3], splt_child.bbox[3])  # height
                splt_dist = dy_abs

            if splt_scale == 0:
                continue
            splt_norm_dist = splt_dist / splt_scale

            # ビンに分類
            splt_bin_idx = np.digitize(splt_norm_dist, splt_bin_edges) - 1
            splt_bin_idx = min(splt_bin_idx, splt_bin_count - 1)
            splt_heatmap[splt_dir_idx, splt_bin_idx] += 1


In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l" + "r" * splt_heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, splt_heatmap):
    row_str = " & ".join(f"{v:,}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Parent-child frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:parent_child_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import gridspec

# 正規化して log2変換（1000画像あたり）
log_splt_heatmap = np.log2(splt_heatmap / float(len(splt_image_id_to_root)) * 1000 + 1)
log_heatmap = np.log2(heatmap / float(len(image_id_to_root)) * 1000 + 1)

# データ拡大（方向・距離を滑らかに表示）
def prepare_rose_matrix(log_map):
    Z = log_map.T  # shape: (6, 8)
    repeat_factor = 17
    Z_repeat = np.repeat(np.repeat(Z, repeat_factor, axis=0), repeat_factor, axis=1)
    Z_repeat = np.roll(Z_repeat, shift=-repeat_factor//2+1, axis=1)
    return Z_repeat

Z1 = prepare_rose_matrix(log_splt_heatmap)
Z2 = prepare_rose_matrix(log_heatmap)

# 極座標グリッド
theta_edges = np.linspace(0, 2 * np.pi, Z1.shape[1] + 1) - (np.pi / Z1.shape[1])
r_edges = np.linspace(0, 6, Z1.shape[0] + 1)
Theta, R = np.meshgrid(theta_edges, r_edges)

# 平均値の方向別表示ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
tick_angles = np.linspace(0, 2 * np.pi, 8, endpoint=False)

splt_dir_means = splt_heatmap.sum(axis=1) / float(len(splt_image_id_to_root))
doc_dir_means = heatmap.sum(axis=1) / float(len(image_id_to_root))

splt_labels = [f"{label}\n{val:.2f}" for label, val in zip(direction_labels, splt_dir_means)]
doc_labels = [f"{label}\n{val:.2f}" for label, val in zip(direction_labels, doc_dir_means)]

# --- Figure & GridSpec 定義 ---
fig = plt.figure(figsize=(16, 8))
gs = gridspec.GridSpec(1, 3, width_ratios=[1, 0.05, 1], wspace=0.3)  # ★ wspace で中央に空間確保

# 左：SciPostLayoutTree
ax_left = fig.add_subplot(gs[0, 0], polar=True)
ax_left.set_theta_direction(-1)
ax_left.set_theta_offset(0)
ax_left.grid(False)

# 右：DocHieNet
ax_right = fig.add_subplot(gs[0, 2], polar=True)
ax_right.set_theta_direction(-1)
ax_right.set_theta_offset(0)
ax_right.grid(False)

# 描画（共通カラーマップ）
pcm1 = ax_left.pcolormesh(Theta, R, Z1, cmap='Blues', shading='auto', vmin=0, vmax=12)
pcm2 = ax_right.pcolormesh(Theta, R, Z2, cmap='Blues', shading='auto', vmin=0, vmax=12)

# ラベル（方向）
tick_angles = np.linspace(0, 2 * np.pi, 8, endpoint=False)
ax_left.set_xticks(tick_angles)
ax_left.set_xticklabels(splt_labels, fontsize=28)
ax_right.set_xticks(tick_angles)
ax_right.set_xticklabels(doc_labels, fontsize=28)

# y軸ラベル
for ax in [ax_left, ax_right]:
    ax.set_yticks(np.arange(0, 7))
    ticks = ax.set_yticklabels(["0", "1", "2", "4", "8", "16", r"$\infty$"], fontsize=28)
    ticks[-1].set_fontsize(36)

# 中央カラーバー（専用 subplot 使用）
cax = fig.add_subplot(gs[0, 1])  # 中央の narrow column
cbar = fig.colorbar(pcm1, cax=cax)
cbar.ax.tick_params(labelsize=28)

# 保存・表示
plt.savefig("./figures/splt_dhn_parent_child_rose_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/splt_dhn_parent_child_rose_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
from scipy.stats import norm
import numpy as np

poster_pages = len(splt_image_id_to_root)
doc_pages = len(image_id_to_root)

directions, distances = splt_heatmap.shape  # shape = (8方向, 6距離)
p_values = np.full((directions, distances), np.nan)
significant = np.zeros((directions, distances), dtype=bool)

for i in range(directions):      # i = 方向
    for j in range(distances):   # j = 距離ビン
        a1 = splt_heatmap[i, j]
        a2 = heatmap[i, j]
        n1 = poster_pages
        n2 = doc_pages

        r1 = a1 / n1
        r2 = a2 / n2
        se = np.sqrt(r1 / n1 + r2 / n2)

        if se == 0:
            p = 1.0 if r1 == r2 else 0.0
        else:
            z = (r1 - r2) / se
            p = 2 * (1 - norm.cdf(abs(z)))

        p_values[i, j] = p
        significant[i, j] = p < 0.05

direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
distance_labels = ["(0,1]", "(1,2]", "(2,4]", "(4,8]", "(8,16]", "(16,∞)"]

print("\n=== ページあたり出現率に有意差なし (p ≥ 0.05, Poisson z-test) ===")
for i in range(directions):
    for j in range(distances):
        if not significant[i, j]:
            direc = direction_labels[i]
            dist = distance_labels[j]
            print(f"{direc} × {dist}: p = {p_values[i, j]:.4f}")

In [None]:
import numpy as np
from collections import defaultdict

# 方向ラベルの順番を固定（classify_8_directions の出力順と一致させる）
direction_labels = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
direction_to_idx = {label: i for i, label in enumerate(direction_labels)}

# 距離ビン設定
thresholds = [1, 2, 4, 8, 16]
bin_edges = [0] + thresholds + [np.inf]  # len = 7 → 6ビン
num_dirs = len(direction_labels)
num_bins = len(bin_edges) - 1

# 結果格納：8方向 × 6距離ビン
heatmap = np.zeros((num_dirs, num_bins), dtype=int)

for image_id, root in image_id_to_root.items():
    nodes = [n for n in dfs_all_nodes(root) if n.category != "Root"]
    for i in range(len(nodes) - 1):
        n1, n2 = nodes[i], nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)
        dx, dy = x2 - x1, y2 - y1

        # 同一位置はスキップ
        if dx == 0 and dy == 0:
            continue

        # --- 方向分類 ---
        direction = classify_8_directions(dx, dy)
        dir_idx = direction_to_idx[direction]

        # --- 相対距離（スケール正規化）---
        if abs(dx) >= abs(dy):
            scale = max(n1.bbox[2], n2.bbox[2])  # width
            dist = abs(dx)
        else:
            scale = max(n1.bbox[3], n2.bbox[3])  # height
            dist = abs(dy)
        if scale == 0:
            continue
        norm_dist = dist / scale

        # --- 距離ビン分類 ---
        bin_idx = np.digitize(norm_dist, bin_edges) - 1  # bin_edges[i-1] < x ≤ bin_edges[i]
        bin_idx = min(bin_idx, num_bins - 1)  # 安全措置

        # --- カウント ---
        heatmap[dir_idx, bin_idx] += 1

In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l|" + "r" * heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, heatmap):
    row_str = " & ".join(f"{v/len(image_id_to_root)*1000:,.2f}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Reading Order frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:reading_order_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))

In [None]:
# --- bbox 中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（8方向） ---
def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 定義 ---
direction_labels = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
direction_to_index = {d: i for i, d in enumerate(direction_labels)}
thresholds = [1, 2, 4, 8, 16]
bin_edges = [0] + thresholds + [float('inf')]
bin_count = len(bin_edges) - 1

# --- カウント行列（方向 × 距離ビン）
heatmap = np.zeros((8, bin_count), dtype=int)

# --- 親子ノード関係からカウント
for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx, dy = x2 - x1, y2 - y1
            if dx == 0 and dy == 0:
                continue

            direction = classify_8_directions(dx, dy)
            if direction == "不明":
                continue
            dir_idx = direction_to_index[direction]

            # 正規化距離（支配方向）
            dx_abs, dy_abs = abs(dx), abs(dy)
            if dx_abs >= dy_abs:
                scale = max(parent.bbox[2], child.bbox[2])  # width
                dist = dx_abs
            else:
                scale = max(parent.bbox[3], child.bbox[3])  # height
                dist = dy_abs

            if scale == 0:
                continue
            norm_dist = dist / scale

            # ビンに分類
            bin_idx = np.digitize(norm_dist, bin_edges) - 1
            bin_idx = min(bin_idx, bin_count - 1)
            heatmap[dir_idx, bin_idx] += 1

In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l|" + "r" * heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, heatmap):
    row_str = " & ".join(f"{v/len(image_id_to_root)*1000:,.2f}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Parent-child frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:parent_child_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))