In [None]:
import os
import json
import itertools
import numpy as np
from collections import defaultdict, Counter
from pathlib import Path
import torch
import matplotlib.pyplot as plt
from collections import deque

from detectron2.utils import comm
from detectron2.evaluation import DatasetEvaluator
from detectron2.structures import pairwise_iou
from scipy.optimize import linear_sum_assignment
from apted import APTED, Config

In [None]:
os.makedirs("./figures", exist_ok=True)

In [None]:
dataset_dir = "/scipostlayout/poster/png"
json_files = ["train_tree.json", "dev_tree.json", "test_tree.json"]
# json_files = ["test_tree.json"]

In [None]:
# データ統合用
all_images = {}
all_annotations = []
category_id_to_name = {}

# JSON 読み込みと統合
for fname in json_files:
    path = os.path.join(dataset_dir, fname)
    with open(path, "r") as f:
        data = json.load(f)
        # 画像情報
        for img in data.get("images", []):
            all_images[img["id"]] = img
        # アノテーション情報
        all_annotations.extend(data.get("annotations", []))
        # カテゴリ情報（初回のみ登録）
        if not category_id_to_name:
            for cat in data.get("categories", []):
                category_id_to_name[cat["id"]] = cat["name"]

# 「Root」「Unknown」を除外
excluded_names = {"Root", "Unknown"}
excluded_ids = {cat_id for cat_id, name in category_id_to_name.items() if name in excluded_names}
filtered_annotations = [ann for ann in all_annotations if ann["category_id"] not in excluded_ids]

# 対象画像 ID とアノテーションを再構築
used_image_ids = {ann["image_id"] for ann in filtered_annotations}
filtered_images = {img_id: all_images[img_id] for img_id in used_image_ids}

# --- 画像ごとの BBox 数をカウント ---
image_id_to_count = Counter()
for ann in filtered_annotations:
    image_id_to_count[ann["image_id"]] += 1

bbox_counts_per_image = list(image_id_to_count.values())
total_images = len(filtered_images)
total_annotations = len(filtered_annotations)

print(f"Images: {total_images}")
print(f"Annotations: {total_annotations}")
print(f"Mean BBoxes: {np.mean(bbox_counts_per_image):.2f}")
print(f"Std. BBoxes: {np.std(bbox_counts_per_image):.2f}")

# --- カテゴリごとの BBox 数カウント ---
category_counts = Counter()
for ann in filtered_annotations:
    category_counts[ann["category_id"]] += 1

print("\nCategory BBoxes:")
for cat_id, count in category_counts.most_common():
    name = category_id_to_name.get(cat_id, f"(id={cat_id})")
    print(f"  {name:20s}: {count}")

# 全画像ID
all_image_ids = set(filtered_images.keys())

# 画像ごとカテゴリごとのカウント初期化（全ポスターに対して）
cat_img_to_count = defaultdict(lambda: {img_id: 0 for img_id in all_image_ids})

# アノテーションを集計
for ann in filtered_annotations:
    cat_id = ann["category_id"]
    img_id = ann["image_id"]
    cat_img_to_count[cat_id][img_id] += 1

# 統計出力
print("\nMean (Std.) Category BBoxes:")
for cat_id, img_counts in cat_img_to_count.items():
    name = category_id_to_name.get(cat_id, f"(id={cat_id})")
    values = list(img_counts.values())  # 全ポスター分の値（0を含む）
    mean = np.mean(values)
    std = np.std(values)
    print(f"  {name:20s}: Mean={mean:.2f}, Std.={std:.2f}")

In [None]:
N = 30  # 上位件数

# image_id → BBox 数 の降順ソート
top_images = image_id_to_count.most_common(N)

print(f"\nBBox 数が多い上位 {N} 件の画像:")
for i, (image_id, count) in enumerate(top_images, 1):
    img_info = all_images.get(image_id, {})
    fname = img_info.get("file_name", "(no name)")
    print(f"{i:2d}: image_id={image_id}, file_name={fname}, BBox 数={count}")

In [None]:
class TreeNode:
    def __init__(self, id, label, bbox, category, priority):
        self.id = id
        self.label = label
        self.bbox = bbox
        self.category = category
        self.priority = priority
        # List[TreeNode]
        self.children = []

In [None]:
def build_tree_with_root(annotations):
    """
    Root ノードを必ず根とし、それ以外のノードは 1 親制約の下で構築。
    """

    # ID → annotation と TreeNode の辞書
    id_to_ann = {ann["id"]: ann for ann in annotations}
    id_to_node = {
        ann["id"]: TreeNode(
            id=ann["id"],
            label=ann["id"],
            bbox=ann["bbox"],
            category=ann["category_name"],
            priority=ann["priority"]
        )
        for ann in annotations
    }

    # 親 → 子リンクを構築（parent が存在するノードだけ処理）
    for ann in annotations:
        parent_ids = ann.get("parents", [])
        if parent_ids:
            parent_id = parent_ids[0]  # Root 以外は必ず 1 親
            parent_node = id_to_node[parent_id]
            parent_node.children.append(id_to_node[ann["id"]])

    # すべてのノードで children を priority 昇順に並べる
    for node in id_to_node.values():
        node.children.sort(key=lambda child: child.priority)

    # Root ノードを返す（1つだけ存在する前提）
    for ann in annotations:
        if ann["category_name"] == "Root":
            return id_to_node[ann["id"]]

    raise ValueError("Root ノードが見つかりませんでした。")

In [None]:
def build_trees_per_image(annotations):
    """
    filtered_annotations のようなアノテーションリストを画像ごとにまとめ、
    各画像ごとに木（TreeNode）を構築する。

    戻り値: dict[image_id] = root TreeNode
    """

    # image_id ごとにアノテーションをグループ化
    image_to_anns = defaultdict(list)
    for ann in annotations:
        image_to_anns[ann["image_id"]].append(ann)

    image_to_root = {}

    for image_id, anns in image_to_anns.items():
        try:
            root = build_tree_with_root(anns)
            image_to_root[image_id] = root
        except ValueError as e:
            print(f"[警告] image_id={image_id} で木構築失敗: {e}")

    return image_to_root

In [None]:
# 例: Tree を画像ごとに構築
filtered_annotations = [ann for ann in all_annotations if ann.get("category_name") != "Unknown"]
image_id_to_root = build_trees_per_image(filtered_annotations)

print(f"構築された木の数: {len(image_id_to_root)}")

# 任意の画像 ID の木を調べる
sample_id = next(iter(image_id_to_root))
tree = image_id_to_root[sample_id]
print(f"Root ID: {tree.id}, label: {tree.label}")

In [None]:
def print_tree(node, indent=0):
    """
    TreeNode を再帰的に表示する（階層構造付き）。
    """
    prefix = "  " * indent
    print(f"{prefix}- ID: {node.id}, Priority: {node.priority}, Category: {node.category}")
    for child in node.children:
        print_tree(child, indent + 1)


In [None]:
# 任意の image_id から木を取得して表示
sample_id = next(iter(image_id_to_root))
tree = image_id_to_root[sample_id]

print(f"[Tree for image_id={sample_id}]")
print_tree(tree)

In [None]:
from collections import deque, defaultdict

def compute_tree_depth_and_width(root):
    """
    与えられた TreeNode に対して、木の最大深さと幅（最大ノード数の階層）を返す。
    幅は親が異なっても同じ階層なら合算。
    """
    max_depth = 0
    level_counts = defaultdict(int)  # depth: node count

    queue = deque([(root, 1)])  # (node, depth)
    while queue:
        node, depth = queue.popleft()
        level_counts[depth] += 1
        max_depth = max(max_depth, depth)
        for child in node.children:
            queue.append((child, depth + 1))

    max_width = max(level_counts.values())

    return max_depth, max_width

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 統計格納
depths = []
widths = []
image_ids = []

# 幅と image_id を対応づけ
image_width_list = []

for image_id, root in image_id_to_root.items():
    depth, width = compute_tree_depth_and_width(root)
    depths.append(depth)
    widths.append(width)
    image_ids.append(image_id)
    image_width_list.append((image_id, width))

# 統計出力
print("Tree statistics:")
print(f"mean depth: {np.mean(depths):.2f}")
print(f"max depth : {np.max(depths)}")
print(f"std. depth: {np.std(depths):.2f}")
print(f"mean width: {np.mean(widths):.2f}")
print(f"max width : {np.max(widths)}")
print(f"std. width: {np.std(widths):.2f}")

children_counts = []

def collect_children_counts(node):
    children_counts.append(len(node.children))
    for child in node.children:
        collect_children_counts(child)

# 全木を対象に集計
for root in image_id_to_root.values():
    collect_children_counts(root)

# 統計計算
mean_children = np.mean(children_counts)
var_children = np.std(children_counts)

print(f"mean children: {mean_children:.2f}")
print(f"std. children: {var_children:.2f}")

In [None]:
# 深さのヒストグラム
# 深さの頻度カウント
depth_counter = Counter(depths)
x = sorted(depth_counter.keys())
y = [depth_counter[d] for d in x]

plt.figure(figsize=(6, 1.5))
plt.bar(x, y, edgecolor="black")
# plt.title("Tree Depth Distribution")
plt.xlabel("Depth", fontsize=20)
plt.ylabel("Frequency")
plt.xticks(x, fontsize=20)  # 整数に限定
plt.yticks(fontsize=20)
plt.grid(axis='y')
plt.tight_layout()
plt.savefig("./figures/tree_depth_dist.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/tree_depth_dist.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 幅のヒストグラム
# 幅の頻度カウント
width_counter = Counter(widths)
x_vals = sorted(width_counter.keys())
y_vals = [width_counter[w] for w in x_vals]

plt.figure(figsize=(6, 1.5))
plt.bar(x_vals, y_vals, edgecolor="black")
# plt.title("Tree Width Distribution")
plt.xlabel("Width", fontsize=20)
plt.ylabel("Frequency")

# x軸の目盛を5ごとに設定
min_x = min(x_vals)
max_x = max(x_vals)
xticks = list(range((min_x // 5) * 5, ((max_x // 5) + 1) * 5 + 1, 10))
plt.xticks(xticks, fontsize=20)
plt.yticks(fontsize=20)

plt.grid(axis='y')
plt.tight_layout()
plt.savefig("./figures/tree_width_dist.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/tree_width_dist.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
from collections import Counter

# 分岐数の頻度カウント
branch_counter = Counter(children_counts)
x_vals = sorted(branch_counter.keys())
y_vals = np.log2([branch_counter[x] for x in x_vals])

# ヒストグラムの棒を描画
plt.figure(figsize=(6, 1.5))
plt.bar(x_vals, y_vals, edgecolor="black")
# plt.title("Distribution of Number of Children per Node")
plt.xlabel("Number of children", fontsize=20)
plt.ylabel("Log-scale\nFrequency")

# x軸目盛りを 5 ごとに設定
min_x = min(x_vals)
max_x = max(x_vals)
xticks = list(range((min_x // 5) * 5, ((max_x // 5 + 1) * 5) + 1, 10))
plt.xticks(xticks, fontsize=20)
plt.yticks(fontsize=20)

plt.grid(axis='y')
plt.tight_layout()
plt.savefig("./figures/num_children_dist.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/num_children_dist.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter

# --- Tree Depth（そのまま） ---
depth_counter = Counter(depths)
depth_x = sorted(depth_counter.keys())
depth_y = [depth_counter[d] for d in depth_x]

# --- Tree Width：40以上を40にまとめる（カウントはそのまま、棒1本ずつ） ---
width_counter = Counter()
for w in widths:
    if w >= 40:
        width_counter[40] += 1
    else:
        width_counter[w] += 1
width_x = sorted(width_counter.keys())
width_y = [width_counter[x] for x in width_x]

# --- Children per Node：40以上を40にまとめ、logスケール ---
branch_counter = Counter()
for c in children_counts:
    if c >= 40:
        branch_counter[40] += 1
    else:
        branch_counter[c] += 1
branch_x = sorted(branch_counter.keys())
branch_y = [np.log2(branch_counter[x]) for x in branch_x]

# --- プロット ---
fig, axes = plt.subplots(1, 3, figsize=(12, 1.8))

xlabel_fontsize = 20
ylabel_fontsize = 12
tick_fontsize = 20

# --- (a) Tree Depth ---
axes[0].bar(depth_x, depth_y, edgecolor="black")
axes[0].set_title("(a) Tree Depth", fontsize=20)
axes[0].set_xlabel("Depth", fontsize=xlabel_fontsize)
axes[0].set_ylabel("Frequency", fontsize=ylabel_fontsize)
axes[0].set_xticks(depth_x)
axes[0].grid(axis='y')

# --- (b) Tree Width ---
axes[1].bar(width_x, width_y, edgecolor="black")
axes[1].set_title("(b) Tree Width", fontsize=20)
axes[1].set_xlabel("Width", fontsize=xlabel_fontsize)
axes[1].set_ylabel("Frequency", fontsize=ylabel_fontsize)

# x軸ラベル：10区切り + "40+"
xticks_w = [0, 10, 20, 30, 40]
xtick_labels_w = ["0", "10", "20", "30", "40+"]
axes[1].set_xticks(xticks_w)
axes[1].set_xticklabels(xtick_labels_w)
axes[1].grid(axis='y')

# --- (c) Number of Children per Node ---
axes[2].bar(branch_x, branch_y, edgecolor="black")
axes[2].set_title("(c) Number of Children per Node", fontsize=18)
axes[2].set_xlabel("Number of children", fontsize=xlabel_fontsize)
axes[2].set_ylabel("Log-scale\nFrequency", fontsize=ylabel_fontsize)
axes[2].yaxis.set_label_coords(-0.15, 0.2)

# x軸ラベル：10区切り + "40+"
xticks_b = [0, 10, 20, 30, 40]
xtick_labels_b = ["0", "10", "20", "30", "40+"]
axes[2].set_xticks(xticks_b)
axes[2].set_xticklabels(xtick_labels_b)
axes[2].grid(axis='y')

# --- 軸目盛フォントサイズ ---
for ax in axes:
    ax.tick_params(labelsize=tick_fontsize)

# --- 保存と表示 ---
plt.tight_layout()
plt.savefig("./figures/tree_stats_summary.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/tree_stats_summary.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
N = 30  # 上位件数

# 幅で降順ソート
top_images = sorted(image_width_list, key=lambda x: x[1], reverse=True)[:N]

print(f"\n幅が大きい上位 {N} 件の image_id:")
for i, (img_id, width) in enumerate(top_images, 1):
    print(f"{i:2d}: image_id={img_id}, width={width}")

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter

# 指定されたラベル順
label_order = ["Root", "Title", "Author Info", "Section", "Text", "List", "Figure", "Table", "Caption"]

# 親子ペアをカウント
pair_counter = Counter()

def collect_parent_child_pairs(node):
    for child in node.children:
        pair_counter[(node.category, child.category)] += 1
        collect_parent_child_pairs(child)

# 全木に対して集計
for root in image_id_to_root.values():
    collect_parent_child_pairs(root)

# クロス表初期化
df = pd.DataFrame(0, index=label_order, columns=label_order)

# カウントを記入
for (parent_label, child_label), count in pair_counter.items():
    if parent_label in df.index and child_label in df.columns:
        df.loc[parent_label, child_label] = count

# --- 全0行・列を除去 ---
# 行：親として出現したラベル
used_rows = df.sum(axis=1) > 0
# 列：子として出現したラベル
used_cols = df.sum(axis=0) > 0

# 両方で非ゼロのラベルのみ抽出
df_filtered = df.loc[used_rows, used_cols]
df_filtered.columns = [label.replace(" ", "\n") for label in df_filtered.columns]

# --- ヒートマップ描画 ---
plt.figure(figsize=(10, 3))
# sns.heatmap(df_filtered, annot=True, fmt="d", cmap="Blues", linewidths=0.)
ax = sns.heatmap(df_filtered, cmap="Blues", linewidths=0., annot_kws={"fontsize": 18}, cbar_kws={"shrink": 1.0})
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=18)
# plt.title("Parent-Child Frequency Heatmap")
plt.xlabel("Child Category", fontsize=18)
plt.ylabel("Parent Category", fontsize=18)
plt.xticks(rotation=0, fontsize=16)
plt.yticks(rotation=0, fontsize=16)
plt.tight_layout()
plt.savefig("./figures/parent_child_heatmap.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/parent_child_heatmap.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
from tabulate import tabulate

# --- 非ゼロ行・列の抽出 ---
used_rows = df.sum(axis=1) != 0
used_cols = df.sum(axis=0) != 0
df_filtered = df.loc[used_rows, used_cols]

# --- 表示名整形（改行など必要に応じて）
df_filtered.columns = [col.replace(" ", "\n") for col in df_filtered.columns]
df_filtered.index = [idx.replace(" ", "\n") for idx in df_filtered.index]

# --- 数値をカンマ付きに整形 ---
df_formatted = df_filtered.applymap(lambda x: f"{int(x):,}")

# --- LaTeX tabular 本体組み立て ---
header = ["Parent \\textbackslash{} Child"] + list(df_formatted.columns)
rows = [[row] + df_formatted.loc[row].tolist() for row in df_formatted.index]

latex_table = "\\begin{table}[t!]\n"
latex_table += "\\centering\n"
latex_table += "\\resizebox{.95\\columnwidth}{!}{\n"
latex_table += "\\begin{tabular}{l|" + "r" * len(df_formatted.columns) + "}\n"
latex_table += "\\toprule\n"
latex_table += " & ".join(["\\multicolumn{1}{l}{" + col + "}" for col in ["Parent \\textbackslash{} Child"] + list(df_formatted.columns)]) + " \\\\\n"
latex_table += "\\midrule\n"
for row in rows:
    latex_table += row[0] + " & " + " & ".join(row[1:]) + " \\\\\n"
latex_table += "\\bottomrule\n"
latex_table += "\\end{tabular}\n"
latex_table += "}\n"
latex_table += "\\caption{Parent-child frequency by category.}\n"
latex_table += "\\label{tab:parent_child_category_frequency}\n"
latex_table += "\\end{table}"

print(latex_table)


In [None]:
from tabulate import tabulate

# --- 非ゼロ行・列の抽出 ---
used_rows = df.sum(axis=1) != 0
used_cols = df.sum(axis=0) != 0
df_filtered = df.loc[used_rows, used_cols]

# --- 表示名整形（改行など必要に応じて）
df_filtered.columns = [col.replace(" ", "\n") for col in df_filtered.columns]
df_filtered.index = [idx.replace(" ", "\n") for idx in df_filtered.index]

# --- 数値をカンマ付きに整形 ---
df_formatted = df_filtered.applymap(lambda x: f"{int(x)/len(image_id_to_root)*1000:,.2f}")

# --- LaTeX tabular 本体組み立て ---
header = ["Parent \\textbackslash{} Child"] + list(df_formatted.columns)
rows = [[row] + df_formatted.loc[row].tolist() for row in df_formatted.index]

latex_table = "\\begin{table}[t!]\n"
latex_table += "\\centering\n"
latex_table += "\\resizebox{.95\\columnwidth}{!}{\n"
latex_table += "\\begin{tabular}{l|" + "r" * len(df_formatted.columns) + "}\n"
latex_table += "\\toprule\n"
latex_table += " & ".join(["\\multicolumn{1}{l}{" + col + "}" for col in ["Parent \\textbackslash{} Child"] + list(df_formatted.columns)]) + " \\\\\n"
latex_table += "\\midrule\n"
for row in rows:
    latex_table += row[0] + " & " + " & ".join(row[1:]) + " \\\\\n"
latex_table += "\\bottomrule\n"
latex_table += "\\end{tabular}\n"
latex_table += "}\n"
latex_table += "\\caption{Parent-child frequency by category.}\n"
latex_table += "\\label{tab:parent_child_category_frequency}\n"
latex_table += "\\end{table}"

print(latex_table)


In [None]:
import numpy as np

# 対数変換（0 対応の log1p）
df_log = np.log2(1+df_filtered)

plt.figure(figsize=(10, 3))
# annot に元の値、描画は logスケール
ax = sns.heatmap(
    df_log,
    cmap="Blues",
    linewidths=0.,
    annot_kws={"fontsize": 18},
    cbar_kws={"shrink": 1.0}
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=18)

plt.xlabel("Child Category", fontsize=18)
plt.ylabel("Parent Category", fontsize=18)
plt.xticks(rotation=0, fontsize=16)
plt.yticks(rotation=0, fontsize=16)
plt.tight_layout()
plt.savefig("./figures/parent_child_heatmap_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/parent_child_heatmap_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
import numpy as np

# 対数変換（0 対応の log1p）
df_log = np.log2(df_filtered/len(image_id_to_root)*1000+1)

plt.figure(figsize=(10, 3))
# annot に元の値、描画は logスケール
ax = sns.heatmap(
    df_log,
    cmap="Blues",
    linewidths=0.,
    annot_kws={"fontsize": 18},
    cbar_kws={"shrink": 1.0}
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=18)

plt.xlabel("Child Category", fontsize=18)
plt.ylabel("Parent Category", fontsize=18)
plt.xticks(rotation=0, fontsize=16)
plt.yticks(rotation=0, fontsize=16)
plt.tight_layout()
plt.savefig("./figures/parent_child_heatmap_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/parent_child_heatmap_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
reading_order_counter = Counter()

def dfs_all_nodes(node):
    """
    priority順で全ノードをたどり、pre-order順の線形リストを返す。
    """
    nodes = [node]
    for child in sorted(node.children, key=lambda c: c.priority):
        nodes.extend(dfs_all_nodes(child))
    return nodes

# すべての木に対して線形列を走査し、隣接ペアをカウント
for root in image_id_to_root.values():
    ordered_nodes = dfs_all_nodes(root)
    for i in range(len(ordered_nodes) - 1):
        label1 = ordered_nodes[i].category
        label2 = ordered_nodes[i+1].category
        reading_order_counter[(label1, label2)] += 1


In [None]:
# ラベル順（固定）
label_order = ["Root", "Title", "Author Info", "Section", "Text", "List", "Figure", "Table", "Caption"]

# クロス表初期化
df = pd.DataFrame(0, index=label_order, columns=label_order)

# ペア頻度を格納
for (label1, label2), count in reading_order_counter.items():
    if label1 in df.index and label2 in df.columns:
        df.loc[label1, label2] = count

# ゼロ行・列を除去
used_rows = df.sum(axis=1) > 0
used_cols = df.sum(axis=0) > 0
df_filtered = df.loc[used_rows, used_cols]
df_filtered.columns = [label.replace(" ", "\n") for label in df_filtered.columns]

# ヒートマップ表示
plt.figure(figsize=(10, 4))
# sns.heatmap(df_filtered, annot=True, fmt="d", cmap="Blues", linewidths=0.)
ax = sns.heatmap(df_filtered, cmap="Blues", linewidths=0., annot_kws={"fontsize": 18}, cbar_kws={"shrink": 1.0})
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=18)
# plt.title("Reading Order Transition Heatmap")
plt.xlabel("Subsequent Category", fontsize=18)
plt.ylabel("Preceding Category", fontsize=18)
plt.xticks(rotation=0, fontsize=16)
plt.yticks(rotation=0, fontsize=16)
plt.tight_layout()
plt.savefig("./figures/reading_order_heatmap.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/reading_order_heatmap.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 固定ラベル順
label_order = ["Root", "Title", "Author Info", "Section", "Text", "List", "Figure", "Table", "Caption"]

# ゼロ行・列の除去（順序は label_order に従う）
used_rows = df.sum(axis=1) > 0
used_cols = df.sum(axis=0) > 0
valid_rows = [label for label in label_order if used_rows.get(label, False)]
valid_cols = [label for label in label_order if used_cols.get(label, False)]

# 絞り込み＋改行対応
df_filtered = df.loc[valid_rows, valid_cols]
df_filtered.index = [label.replace(" ", "\n") for label in df_filtered.index]
df_filtered.columns = [label.replace(" ", "\n") for label in df_filtered.columns]

# 数値をカンマ付きに整形
df_formatted = df_filtered.applymap(lambda x: f"{int(x):,}")

# LaTeX テーブル組み立て
latex = "\\begin{table}[t!]\n"
latex += "\\centering\n"
latex += "\\resizebox{.95\\columnwidth}{!}{\n"
latex += "\\begin{tabular}{l|" + "r" * len(df_formatted.columns) + "}\n"
latex += "\\toprule\n"
headers = ["Preceding \\textbackslash{} Subsequent"] + list(df_formatted.columns)
latex += " & ".join([f"\\multicolumn{{1}}{{l}}{{{col}}}" for col in headers]) + " \\\\\n"
latex += "\\midrule\n"
for row_label in df_formatted.index:
    row = df_formatted.loc[row_label]
    latex += f"{row_label} & " + " & ".join(row.values) + " \\\\\n"
latex += "\\bottomrule\n"
latex += "\\end{tabular}\n"
latex += "}\n"
latex += "\\caption{Category transition frequencies in reading order.}\n"
latex += "\\label{tab:reading_order_transition}\n"
latex += "\\end{table}"

print(latex)


In [None]:
# 固定ラベル順
label_order = ["Root", "Title", "Author Info", "Section", "Text", "List", "Figure", "Table", "Caption"]

# ゼロ行・列の除去（順序は label_order に従う）
used_rows = df.sum(axis=1) > 0
used_cols = df.sum(axis=0) > 0
valid_rows = [label for label in label_order if used_rows.get(label, False)]
valid_cols = [label for label in label_order if used_cols.get(label, False)]

# 絞り込み＋改行対応
df_filtered = df.loc[valid_rows, valid_cols]
df_filtered.index = [label.replace(" ", "\n") for label in df_filtered.index]
df_filtered.columns = [label.replace(" ", "\n") for label in df_filtered.columns]

# 数値をカンマ付きに整形
df_formatted = df_filtered.applymap(lambda x: f"{int(x)/len(image_id_to_root)*1000:,.2f}")

# LaTeX テーブル組み立て
latex = "\\begin{table}[t!]\n"
latex += "\\centering\n"
latex += "\\resizebox{.95\\columnwidth}{!}{\n"
latex += "\\begin{tabular}{l|" + "r" * len(df_formatted.columns) + "}\n"
latex += "\\toprule\n"
headers = ["Preceding \\textbackslash{} Subsequent"] + list(df_formatted.columns)
latex += " & ".join([f"\\multicolumn{{1}}{{l}}{{{col}}}" for col in headers]) + " \\\\\n"
latex += "\\midrule\n"
for row_label in df_formatted.index:
    row = df_formatted.loc[row_label]
    latex += f"{row_label} & " + " & ".join(row.values) + " \\\\\n"
latex += "\\bottomrule\n"
latex += "\\end{tabular}\n"
latex += "}\n"
latex += "\\caption{Category transition frequencies in reading order.}\n"
latex += "\\label{tab:reading_order_transition}\n"
latex += "\\end{table}"

print(latex)


In [None]:
# log2(1 + x) に変換（0対応）
df_log2 = np.log2(df_filtered + 1)

plt.figure(figsize=(10, 4))
ax = sns.heatmap(
    df_log2,
    cmap="Blues",
    linewidths=0.,
    annot_kws={"fontsize": 18},
    cbar_kws={"shrink": 1.0}
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=18)

plt.xlabel("Subsequent Category", fontsize=18)
plt.ylabel("Preceding Category", fontsize=18)
plt.xticks(rotation=0, fontsize=16)
plt.yticks(rotation=0, fontsize=16)
plt.tight_layout()
plt.savefig("./figures/reading_order_heatmap_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/reading_order_heatmap_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# log2(1 + x) に変換（0対応）
df_log2 = np.log2(df_filtered/len(image_id_to_root)*1000 + 1)

plt.figure(figsize=(10, 4))
ax = sns.heatmap(
    df_log2,
    cmap="Blues",
    linewidths=0.,
    annot_kws={"fontsize": 18},
    cbar_kws={"shrink": 1.0}
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=18)

plt.xlabel("Subsequent Category", fontsize=18)
plt.ylabel("Preceding Category", fontsize=18)
plt.xticks(rotation=0, fontsize=16)
plt.yticks(rotation=0, fontsize=16)
plt.tight_layout()
plt.savefig("./figures/reading_order_heatmap_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/reading_order_heatmap_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# --- bbox中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向定義 ---
direction_types = ["左上", "右上", "左下", "右下"]

# --- 集計 ---
direction_counter = Counter()
image_direction_stats = defaultdict(Counter)

for image_id, root in image_id_to_root.items():
    ordered_nodes = dfs_all_nodes(root)
    filtered_nodes = [n for n in ordered_nodes if n.category != "Root"]

    for i in range(len(filtered_nodes) - 1):
        n1, n2 = filtered_nodes[i], filtered_nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)
        dx = x2 - x1
        dy = y2 - y1

        # 方向分類
        if dx < 0 and dy < 0:
            direction = "左上"
        elif dx > 0 and dy < 0:
            direction = "右上"
        elif dx < 0 and dy > 0:
            direction = "左下"
        elif dx >= 0 and dy >= 0:
            direction = "右下"
        else:
            raise ValueError(f"移動方向を分類できません: dx={dx}, dy={dy}")

        direction_counter[direction] += 1
        image_direction_stats[image_id][direction] += 1

# --- DataFrame 化（画像ごとの方向数） ---
records = []
for image_id in sorted(image_direction_stats.keys()):
    row = {"image_id": image_id}
    counter = image_direction_stats[image_id]
    for d in direction_types:
        row[d] = counter[d]  # 出現しない方向は0になる
    records.append(row)

df_direction = pd.DataFrame(records).set_index("image_id")

# --- 出力 ---
total_all = sum(direction_counter.values())
num_images = len(df_direction)

print("=== 読み順における視線移動方向の統計（Root 除外） ===")
for direction in direction_types:
    total_count = direction_counter[direction]
    total_ratio = total_count / total_all * 100 if total_all > 0 else 0.0

    counts = df_direction[direction].values
    mean_count = counts.sum() / num_images  # ✔ 合計 ÷ 画像数（正しい平均）
    std_count = counts.std(ddof=0)

    print(f"{direction:4s}: "
          f"Total={total_count:6d} 回 ({total_ratio:5.2f}%) | "
          f"画像あたり平均={mean_count:5.2f} / std={std_count:5.2f}")

In [None]:
import numpy as np
from math import atan2, degrees
from collections import defaultdict, Counter

# --- bbox中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（45度区切りで上下左右） ---
def classify_4quadrants(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360  # dyの符号反転なしが正解

    if (angle >= 315 or angle < 45):
        return "右"
    elif 45 <= angle < 135:
        return "下"
    elif 135 <= angle < 225:
        return "左"
    elif 225 <= angle < 315:
        return "上"
    else:
        return "不明"

# --- 集計 ---
direction_counter = Counter()
image_direction_stats = defaultdict(Counter)

for image_id, root in image_id_to_root.items():
    ordered_nodes = dfs_all_nodes(root)
    filtered_nodes = [n for n in ordered_nodes if n.category != "Root"]

    for i in range(len(filtered_nodes) - 1):
        n1, n2 = filtered_nodes[i], filtered_nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)
        dx, dy = x2 - x1, y2 - y1

        if dx == 0 and dy == 0:
            continue

        direction = classify_4quadrants(dx, dy)
        direction_counter[direction] += 1
        image_direction_stats[image_id][direction] += 1

# --- DataFrame 化 ---
direction_types = ["上", "右", "下", "左"]
records = []
for image_id in sorted(image_direction_stats.keys()):
    row = {"image_id": image_id}
    counter = image_direction_stats[image_id]
    for d in direction_types:
        row[d] = counter[d]
    records.append(row)

df_direction = pd.DataFrame(records).fillna(0).set_index("image_id")

# --- 出力 ---
total_all = sum(direction_counter.values())
num_images = len(df_direction)

print("=== 視線移動方向の統計（90度区切り・上下左右） ===")
for direction in direction_types:
    total_count = direction_counter[direction]
    total_ratio = total_count / total_all * 100 if total_all > 0 else 0.0

    counts = df_direction[direction].values
    mean_count = counts.sum() / num_images
    std_count = counts.std(ddof=0)

    print(f"{direction:2s}: Total={total_count:6d} ({total_ratio:5.2f}%) | "
          f"画像あたり平均={mean_count:5.2f} / std={std_count:5.2f}")


In [None]:
# 上方向の回数を整数化して頻度を集計
count_series = df_direction["上"].astype(int).value_counts().sort_index()

# データフレームに変換
df_up_freq = count_series.rename("Image Count").reset_index()
df_up_freq.columns = ["Top Count", "Image Count"]

# 割合（比率・パーセント）を追加
total_images = df_up_freq["Image Count"].sum()
df_up_freq["Ratio"] = df_up_freq["Image Count"] / total_images
df_up_freq["Ratio (%)"] = (df_up_freq["Ratio"] * 100).round(2)

# 表を標準出力で表示
print("=== Histogram Table: Number of 'Top' Transitions per Image ===")
print(df_up_freq.to_string(index=False))

In [None]:
import numpy as np
from math import atan2, degrees
from collections import defaultdict, Counter

# --- bbox中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（45度刻みで8方向） ---
def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360

    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 集計 ---
direction_counter = Counter()
image_direction_stats = defaultdict(Counter)

for image_id, root in image_id_to_root.items():
    ordered_nodes = dfs_all_nodes(root)
    filtered_nodes = [n for n in ordered_nodes if n.category != "Root"]

    for i in range(len(filtered_nodes) - 1):
        n1, n2 = filtered_nodes[i], filtered_nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)
        dx, dy = x2 - x1, y2 - y1

        if dx == 0 and dy == 0:
            continue

        direction = classify_8_directions(dx, dy)
        direction_counter[direction] += 1
        image_direction_stats[image_id][direction] += 1

# --- DataFrame 化 ---
direction_types = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
records = []
for image_id in sorted(image_direction_stats.keys()):
    row = {"image_id": image_id}
    counter = image_direction_stats[image_id]
    for d in direction_types:
        row[d] = counter[d]
    records.append(row)

df_direction = pd.DataFrame(records).fillna(0).set_index("image_id")

# --- 出力 ---
total_all = sum(direction_counter.values())
num_images = len(df_direction)

print("=== 視線移動方向の統計（45度刻み・8方向） ===")
for direction in direction_types:
    total_count = direction_counter[direction]
    total_ratio = total_count / total_all * 100 if total_all > 0 else 0.0

    counts = df_direction[direction].values
    mean_count = counts.sum() / num_images
    std_count = counts.std(ddof=0)

    print(f"{direction:4s}: Total={total_count:6d} ({total_ratio:5.2f}%) | "
          f"画像あたり平均={mean_count:5.2f} / std={std_count:5.2f}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# --- Direction labels and corresponding angles (in degrees) ---
direction_labels = ["Right", "Bottom-Right", "Bottom", "Bottom-Left",
                    "Left", "Top-Left", "Top", "Top-Right"]
direction_angles = [0, 45, 90, 135, 180, 225, 270, 315]

# --- Convert degrees to radians for matplotlib ---
theta = np.deg2rad(direction_angles)

# --- Frequencies for each direction ---
radii = [direction_counter[d_jp] for d_jp in ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]]

# --- Polar bar chart ---
fig, ax = plt.subplots(subplot_kw={'projection': 'polar'})
bars = ax.bar(theta, radii, width=np.deg2rad(45), align='center', edgecolor='black')

# --- Polar plot settings ---
ax.set_theta_zero_location("E")  # 0° at the right
ax.set_theta_direction(-1)       # Clockwise

ax.set_xticks(theta)
ax.set_xticklabels(direction_labels, fontsize=12)

ax.set_yticklabels([])  # Hide radial labels (optional)
ax.set_title("Gaze Transition Frequency by Direction (8 Sectors)", fontsize=18)

plt.tight_layout()
plt.show()


In [None]:
def bbox_edges(bbox):
    x, y, w, h = bbox
    return {
        "left": x,
        "right": x + w,
        "top": y,
        "bottom": y + h,
        "center_x": x + w / 2,
        "center_y": y + h / 2,
    }

# 集計
backward_counter = Counter()
total_triplets = 0
image_backward_stats = defaultdict(Counter)
image_total_triplets = defaultdict(int)

for image_id, root in image_id_to_root.items():
    ordered_nodes = dfs_all_nodes(root)
    filtered = [n for n in ordered_nodes if n.category != "Root"]
    if len(filtered) < 3:
        continue

    for i in range(len(filtered) - 2):
        n1, n2, n3 = filtered[i], filtered[i+1], filtered[i+2]
        b1, b2, b3 = bbox_edges(n1.bbox), bbox_edges(n2.bbox), bbox_edges(n3.bbox)

        dx1 = b2["center_x"] - b1["center_x"]
        dx2 = b3["center_x"] - b2["center_x"]
        dy1 = b2["center_y"] - b1["center_y"]
        dy2 = b3["center_y"] - b2["center_y"]

        total_triplets += 1
        image_total_triplets[image_id] += 1

        # --- 上下方向 ---
        if dy1 > 0 and dy2 < 0:
            if b2["center_y"] > b1["bottom"] and b3["center_y"] < b2["top"]:
                key = "上下:下-上"
            else:
                key = "上下:その他"
        elif dy1 < 0 and dy2 > 0:
            if b2["center_y"] < b1["top"] and b3["center_y"] > b2["bottom"]:
                key = "上下:上-下"
            else:
                key = "上下:その他"
        else:
            key = "上下:その他"
        backward_counter[key] += 1
        image_backward_stats[image_id][key] += 1

        # --- 左右方向 ---
        if dx1 > 0 and dx2 < 0:
            if b2["center_x"] > b1["right"] and b3["center_x"] < b2["left"]:
                key = "左右:右-左"
            else:
                key = "左右:その他"
        elif dx1 < 0 and dx2 > 0:
            if b2["center_x"] < b1["left"] and b3["center_x"] > b2["right"]:
                key = "左右:左-右"
            else:
                key = "左右:その他"
        else:
            key = "左右:その他"
        backward_counter[key] += 1
        image_backward_stats[image_id][key] += 1

# ---------- 出力 ----------

def print_summary(title, keys, group_label):
    print(f"\n=== {group_label} バックワード統計（実位置判定） ===")
    for key in keys:
        total_count = backward_counter[key]
        ratio = total_count / total_triplets * 100 if total_triplets > 0 else 0.0
        print(f"  {key[3:]:8s}: {total_count:5d} 回 ({ratio:6.2f}%)")

    # 画像ごとの正しい平均回数（＝合計 ÷ 画像数）
    print(f"\n--- {group_label} 画像あたりの統計（平均 / 標準偏差） ---")
    for key in keys:
        counts = []
        totals = []
        for image_id in image_backward_stats:
            count = image_backward_stats[image_id][key]
            group_total = sum(image_backward_stats[image_id][k] for k in keys)
            if group_total > 0:
                counts.append(count)
                totals.append(group_total)

        totals = np.array(totals)
        counts = np.array(counts)
        ratios = counts / totals

        # ✔ ここ：全体合計 ÷ 画像数（画像あたりの平均）
        mean_count = counts.sum() / len(counts)
        std_count = counts.std(ddof=1)

        mean_ratio = counts.sum() / totals.sum()
        std_ratio = ratios.std(ddof=1)

        label = key[3:]
        print(f"  {label:8s}: 回数 平均={mean_count:6.2f} / std={std_count:6.2f} | "
              f"割合 平均={mean_ratio*100:6.2f}% / std={std_ratio*100:6.2f}%")

# パターンキー
ud_keys = ["上下:下-上", "上下:上-下", "上下:その他"]
lr_keys = ["左右:右-左", "左右:左-右", "左右:その他"]

# 出力
print_summary("上下", ud_keys, "上下方向")
print_summary("左右", lr_keys, "左右方向")

print(f"\n総トリプレット数: {total_triplets}")

In [None]:
import matplotlib.pyplot as plt
from math import sqrt
import numpy as np
from collections import defaultdict

# --- bbox helpers ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

def bbox_diagonal(bbox):
    _, _, w, h = bbox
    return sqrt(w**2 + h**2)

# --- 設定 ---
thresholds = [1, 2, 4, 8, 16]
bin_labels = [f"(0-{thresholds[0]}]"] + \
             [f"({thresholds[i-1]}-{thresholds[i]}]" for i in range(1, len(thresholds))] + \
             [f">{thresholds[-1]}"]

# --- 結果格納 ---
image_id_to_dists = defaultdict(list)

# --- データ走査（読み順ペアごとに距離を計算） ---
for image_id, root in image_id_to_root.items():
    nodes = [n for n in dfs_all_nodes(root) if n.category != "Root"]
    for i in range(len(nodes) - 1):
        n1, n2 = nodes[i], nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)

        dx = abs(x2 - x1)
        dy = abs(y2 - y1)

        # 支配方向によるスケーリング
        if dx >= dy:
            scale = max(n1.bbox[2], n2.bbox[2])  # width
            dist = dx
        else:
            scale = max(n1.bbox[3], n2.bbox[3])  # height
            dist = dy

        if scale > 0:
            normalized = dist / scale
            image_id_to_dists[image_id].append(normalized)

# --- 正規化距離の配列化 ---
all_values = [d for dists in image_id_to_dists.values() for d in dists]
values = np.array(all_values)

# --- 基本統計 ---
print("\n=== 読み順における相対距離統計 ===")
print(f"平均倍率     : {values.mean():.3f}")
print(f"標準偏差     : {values.std():.3f}")
print(f"中央値       : {np.median(values):.3f}")
print(f"最大 / 最小 : {values.max():.3f} / {values.min():.3f}")

# --- 区間ごとの画像単位の出現回数統計 ---
print("\n=== 区間ごとの画像あたり平均出現数 / 標準偏差 ===")
bin_image_counts = {label: [] for label in bin_labels}

starts = [0] + thresholds
ends = thresholds + [np.inf]

for dists in image_id_to_dists.values():
    dists = np.array(dists)
    for label, s, e in zip(bin_labels, starts, ends):
        count = ((dists > s) & (dists <= e)).sum()
        bin_image_counts[label].append(count)

for label in bin_labels:
    arr = np.array(bin_image_counts[label])
    print(f"{label:>7} : 平均 = {arr.mean():5.2f} / std = {arr.std():5.2f}")

# --- 区間ごとの全体統計（数と割合） ---
print("\n=== 読み順距離の区間ごとの全体カウントと比率 ===")
start = 0
total = len(values)
for t in thresholds:
    count = ((values > start) & (values <= t)).sum()
    percent = count / total * 100
    print(f"{start:>2} < x ≤ {t:<2} : {count:6d} 組 | 比率: {percent:5.1f}%")
    start = t
overflow = (values > thresholds[-1]).sum()
percent = overflow / total * 100
print(f"x > {thresholds[-1]:<2}     : {overflow:6d} 組 | 比率: {percent:5.1f}%")

# --- ヒストグラム描画 ---
plt.figure(figsize=(8, 5))
plt.hist(values, bins=[0] + thresholds + [np.inf], edgecolor='black')
plt.xlabel("Normalized distance between adjacent nodes in reading order")
plt.ylabel("Count")
plt.title("Histogram of normalized distances (DFS reading order)")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
from collections import defaultdict

# 方向ラベルの順番を固定（classify_8_directions の出力順と一致させる）
direction_labels = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
direction_to_idx = {label: i for i, label in enumerate(direction_labels)}

# 距離ビン設定
thresholds = [1, 2, 4, 8, 16]
bin_edges = [0] + thresholds + [np.inf]  # len = 7 → 6ビン
num_dirs = len(direction_labels)
num_bins = len(bin_edges) - 1

# 結果格納：8方向 × 6距離ビン
heatmap = np.zeros((num_dirs, num_bins), dtype=int)

for image_id, root in image_id_to_root.items():
    nodes = [n for n in dfs_all_nodes(root) if n.category != "Root"]
    for i in range(len(nodes) - 1):
        n1, n2 = nodes[i], nodes[i + 1]
        x1, y1 = bbox_center(n1.bbox)
        x2, y2 = bbox_center(n2.bbox)
        dx, dy = x2 - x1, y2 - y1

        # 同一位置はスキップ
        if dx == 0 and dy == 0:
            continue

        # --- 方向分類 ---
        direction = classify_8_directions(dx, dy)
        dir_idx = direction_to_idx[direction]

        # --- 相対距離（スケール正規化）---
        if abs(dx) >= abs(dy):
            scale = max(n1.bbox[2], n2.bbox[2])  # width
            dist = abs(dx)
        else:
            scale = max(n1.bbox[3], n2.bbox[3])  # height
            dist = abs(dy)
        if scale == 0:
            continue
        norm_dist = dist / scale

        # --- 距離ビン分類 ---
        bin_idx = np.digitize(norm_dist, bin_edges) - 1  # bin_edges[i-1] < x ≤ bin_edges[i]
        bin_idx = min(bin_idx, num_bins - 1)  # 安全措置

        # --- カウント ---
        heatmap[dir_idx, bin_idx] += 1

In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l|" + "r" * heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, heatmap):
    row_str = " & ".join(f"{v:,}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Reading Order frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:reading_order_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))

In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l|" + "r" * heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, heatmap):
    row_str = " & ".join(f"{v/len(image_id_to_root)*1000:,.2f}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Reading Order frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:reading_order_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ----------------------------------------
# 入力 heatmap（8方向 × 6距離ビン）
# 方向順：["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
# ----------------------------------------
# heatmap = np.array(...) などで与えてください
# log_heatmap = np.log2(heatmap + 1)  # shape: (8, 6)
log_heatmap = np.log2(heatmap / float(len(image_id_to_root)) * 1000 + 1)

# ----------------------------------------
# データ拡大（補間なし、コピーで繰り返し）
# ----------------------------------------
repeat_factor = 17  # 方向×距離ビンともに4倍（調整可）
Z = log_heatmap.T  # shape: (6, 8) → (rows: distance, cols: direction)
Z_repeat = np.repeat(np.repeat(Z, repeat_factor, axis=0), repeat_factor, axis=1)  # (24, 32)
Z_repeat = np.roll(Z_repeat, shift=-repeat_factor//2+1, axis=1)

# ----------------------------------------
# 極座標グリッド生成
# ----------------------------------------
theta_edges = np.linspace(0, 2 * np.pi, Z_repeat.shape[1] + 1) - (np.pi / Z_repeat.shape[1])
r_edges = np.linspace(0, 6, Z_repeat.shape[0] + 1)  # max bin index = 6
# r_edges = np.logspace(1e-10, np.log2(2**6), Z_repeat.shape[0] + 1, base=2)

Theta, R = np.meshgrid(theta_edges, r_edges)

# ----------------------------------------
# 描画
# ----------------------------------------
fig, ax = plt.subplots(subplot_kw=dict(polar=True), figsize=(8, 8))
ax.set_theta_direction(-1)  # 時計回りに描画
ax.set_theta_offset(0)      # θ=0 を右（3時）に固定

pcm = ax.pcolormesh(Theta, R, Z_repeat, cmap='Blues', shading='auto', vmin=0, vmax=13)

# ----------------------------------------
# ラベル（方向）＋平均値（実数）表示
# ----------------------------------------
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]

# 実数値の平均（距離方向に平均）
dir_means_real = heatmap.sum(axis=1) / float(len(image_id_to_root))

# ラベルに平均値を追加（小数点1桁）
direction_labels_with_mean = [
    f"{label}\n{mean:.2f}" for label, mean in zip(direction_labels, dir_means_real)
]

tick_angles = np.linspace(0, 2 * np.pi, 8, endpoint=False)
ax.set_xticks(tick_angles)
ax.set_xticklabels(direction_labels_with_mean, fontsize=28)

# y軸（距離ビンラベル）
ax.set_yticks(np.arange(0, 7))
tick_texts = ax.set_yticklabels(["0", "1", "2", "4", "8", "16", r"$\infty$"], fontsize=28)
tick_texts[-1].set_fontsize(36)

# ----------------------------------------
# カラーバー
# ----------------------------------------
cbar = plt.colorbar(pcm, ax=ax, pad=0.12, shrink=0.7)
cbar.ax.tick_params(labelsize=28)
# cbar.set_label("log₂(count + 1)", fontsize=12)

# ----------------------------------------
# 表示・保存
# ----------------------------------------
ax.grid(False)
plt.tight_layout()
plt.savefig("./figures/reading_order_rose_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/reading_order_rose_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# --- bbox 中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向定義 ---
direction_types = ["左上", "右上", "左下", "右下"]

# --- 集計 ---
parent_child_direction_counter = Counter()
image_parent_child_direction_stats = defaultdict(Counter)

# 全体＆画像ごとに同時集計
for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx = x2 - x1
            dy = y2 - y1

            if dx < 0 and dy < 0:
                direction = "左上"
            elif dx > 0 and dy < 0:
                direction = "右上"
            elif dx < 0 and dy > 0:
                direction = "左下"
            elif dx >= 0 and dy >= 0:
                direction = "右下"
            else:
                raise ValueError(f"分類不能: dx={dx}, dy={dy}")

            parent_child_direction_counter[direction] += 1
            image_parent_child_direction_stats[image_id][direction] += 1

# --- DataFrame 化 ---
records = []
for image_id in sorted(image_parent_child_direction_stats.keys()):
    row = {"image_id": image_id}
    for direction in direction_types:
        row[direction] = image_parent_child_direction_stats[image_id][direction]
    records.append(row)

df_parent_child_dir = pd.DataFrame(records).set_index("image_id")

# --- 出力 ---
total_all = sum(parent_child_direction_counter.values())
num_images = len(df_parent_child_dir)

print("=== 親子関係における視線移動方向の統計 ===")
for direction in direction_types:
    total_count = parent_child_direction_counter[direction]
    total_ratio = total_count / total_all * 100 if total_all > 0 else 0.0

    # 画像単位の平均・標準偏差（回数）
    counts = df_parent_child_dir[direction].values
    mean_count = counts.mean()
    std_count = counts.std(ddof=0)

    print(f"{direction:4s}: "
          f"Total={total_count:6d} 回 ({total_ratio:5.2f}%) | "
          f"画像あたり平均={mean_count:5.2f} / std={std_count:5.2f}")

In [None]:
from collections import defaultdict, Counter
from math import atan2, degrees
import pandas as pd

# --- bbox 中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（90度ごと） ---
def classify_4quadrants(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if angle >= 315 or angle < 45:
        return "右"
    elif 45 <= angle < 135:
        return "下"
    elif 135 <= angle < 225:
        return "左"
    elif 225 <= angle < 315:
        return "上"
    else:
        return "不明"

# --- 方向カテゴリ（固定順） ---
direction_types = ["右", "下", "左", "上"]

# --- 集計 ---
parent_child_direction_counter = Counter()
image_parent_child_direction_stats = defaultdict(Counter)

# 全体＆画像ごとに同時集計
for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx = x2 - x1
            dy = y2 - y1

            direction = classify_4quadrants(dx, dy)
            if direction == "不明":
                continue  # 無視するが実際には発生しない想定

            parent_child_direction_counter[direction] += 1
            image_parent_child_direction_stats[image_id][direction] += 1

# --- DataFrame 化 ---
records = []
for image_id in sorted(image_parent_child_direction_stats.keys()):
    row = {"image_id": image_id}
    for direction in direction_types:
        row[direction] = image_parent_child_direction_stats[image_id][direction]
    records.append(row)

df_parent_child_dir = pd.DataFrame(records).set_index("image_id")

# --- 出力 ---
total_all = sum(parent_child_direction_counter.values())
num_images = len(df_parent_child_dir)

print("=== 親子関係における上下左右の視線移動方向の統計 ===")
for direction in direction_types:
    total_count = parent_child_direction_counter[direction]
    total_ratio = total_count / total_all * 100 if total_all > 0 else 0.0

    counts = df_parent_child_dir[direction].values
    mean_count = counts.mean()
    std_count = counts.std(ddof=0)

    print(f"{direction:2s}: "
          f"Total={total_count:6d} 回 ({total_ratio:5.2f}%) | "
          f"画像あたり平均={mean_count:5.2f} / std={std_count:5.2f}")


In [None]:
from collections import defaultdict, Counter
from math import atan2, degrees
import pandas as pd

# --- bbox 中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（8方向） ---
def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 方向カテゴリ（固定順） ---
direction_types = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]

# --- 集計用 ---
parent_child_direction_counter = Counter()
image_parent_child_direction_stats = defaultdict(Counter)

# --- 全体集計 ---
for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx = x2 - x1
            dy = y2 - y1

            direction = classify_8_directions(dx, dy)
            if direction == "不明":
                continue

            parent_child_direction_counter[direction] += 1
            image_parent_child_direction_stats[image_id][direction] += 1

# --- DataFrame 化 ---
records = []
for image_id in sorted(image_parent_child_direction_stats.keys()):
    row = {"image_id": image_id}
    for direction in direction_types:
        row[direction] = image_parent_child_direction_stats[image_id][direction]
    records.append(row)

df_parent_child_dir = pd.DataFrame(records).set_index("image_id")

# --- 出力 ---
total_all = sum(parent_child_direction_counter.values())
print("=== 親子関係における視線移動方向（8方向）の統計 ===")
for direction in direction_types:
    total_count = parent_child_direction_counter[direction]
    total_ratio = total_count / total_all * 100 if total_all > 0 else 0.0

    counts = df_parent_child_dir[direction].values
    mean_count = counts.mean()
    std_count = counts.std(ddof=0)

    print(f"{direction:4s}: "
          f"Total={total_count:6d} 回 ({total_ratio:5.2f}%) | "
          f"画像あたり平均={mean_count:5.2f} / std={std_count:5.2f}")


In [None]:
import matplotlib.pyplot as plt
from math import sqrt
import numpy as np
from collections import defaultdict

# --- bbox helper ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

def bbox_diagonal(bbox):
    _, _, w, h = bbox
    return sqrt(w**2 + h**2)

# --- 設定 ---
thresholds = [1, 2, 4, 8, 16]
bin_labels = [f"(0-{thresholds[0]}]"] + \
             [f"({thresholds[i-1]}-{thresholds[i]}]" for i in range(1, len(thresholds))] + \
             [f">{thresholds[-1]}"]

# --- データ収集 ---
normalized_distances = []
image_id_to_dists = defaultdict(list)

for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx = abs(x2 - x1)
            dy = abs(y2 - y1)

            # 支配方向でのスケーリング
            if dx >= dy:
                scale = max(parent.bbox[2], child.bbox[2])  # bbox width
                dist = dx
            else:
                scale = max(parent.bbox[3], child.bbox[3])  # bbox height
                dist = dy

            if scale > 0:
                normalized = dist / scale
                normalized_distances.append(normalized)
                image_id_to_dists[image_id].append(normalized)

# --- 全体統計 ---
values = np.array(normalized_distances)
print("\n=== 親子関係における相対距離統計 ===")
print(f"平均倍率     : {values.mean():.3f}")
print(f"標準偏差     : {values.std():.3f}")
print(f"中央値       : {np.median(values):.3f}")
print(f"最大 / 最小 : {values.max():.3f} / {values.min():.3f}")

# --- 区間ごとの全体カウントと比率 ---
print("\n=== 親子距離の区間ごとのカウントと比率 ===")
start = 0
total = len(values)
starts = [0] + thresholds
ends = thresholds + [np.inf]
for s, e in zip(starts, ends):
    count = ((values > s) & (values <= e)).sum()
    percent = count / total * 100
    label = f"{s} < x ≤ {e}" if e != np.inf else f"x > {s}"
    print(f"{label:<13} : {count:6d} 組 | 比率: {percent:5.1f}%")

# --- 区間ごとの画像あたりの出現数の平均 / 標準偏差 ---
print("\n=== 区間ごとの画像あたり平均出現数 / 標準偏差 ===")
bin_image_counts = {label: [] for label in bin_labels}

for dists in image_id_to_dists.values():
    dists = np.array(dists)
    for label, s, e in zip(bin_labels, starts, ends):
        count = ((dists > s) & (dists <= e)).sum()
        bin_image_counts[label].append(count)

for label in bin_labels:
    arr = np.array(bin_image_counts[label])
    print(f"{label:>7} : 平均 = {arr.mean():5.2f} / std = {arr.std():5.2f}")

# --- ヒストグラム描画 ---
plt.figure(figsize=(8, 5))
plt.hist(values, bins=[0] + thresholds + [np.inf], edgecolor='black')
plt.xlabel("Normalized parent-child distance (× bbox size)")
plt.ylabel("Count")
plt.title("Histogram of normalized distances between parent and child nodes")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
from collections import defaultdict, Counter
from math import atan2, degrees, sqrt
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable

# --- bbox 中心 ---
def bbox_center(bbox):
    x, y, w, h = bbox
    return x + w / 2, y + h / 2

# --- 方向分類（8方向） ---
def classify_8_directions(dx, dy):
    angle = (degrees(atan2(dy, dx)) + 360) % 360
    if (337.5 <= angle < 360) or (0 <= angle < 22.5):
        return "右"
    elif 22.5 <= angle < 67.5:
        return "右下"
    elif 67.5 <= angle < 112.5:
        return "下"
    elif 112.5 <= angle < 157.5:
        return "左下"
    elif 157.5 <= angle < 202.5:
        return "左"
    elif 202.5 <= angle < 247.5:
        return "左上"
    elif 247.5 <= angle < 292.5:
        return "上"
    elif 292.5 <= angle < 337.5:
        return "右上"
    else:
        return "不明"

# --- 定義 ---
direction_labels = ["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
direction_to_index = {d: i for i, d in enumerate(direction_labels)}
thresholds = [1, 2, 4, 8, 16]
bin_edges = [0] + thresholds + [float('inf')]
bin_count = len(bin_edges) - 1

# --- カウント行列（方向 × 距離ビン）
heatmap = np.zeros((8, bin_count), dtype=int)

# --- 親子ノード関係からカウント
for image_id, root in image_id_to_root.items():
    nodes = dfs_all_nodes(root)
    for parent in nodes:
        if parent.category == "Root":
            continue
        for child in parent.children:
            if child.category == "Root":
                continue

            x1, y1 = bbox_center(parent.bbox)
            x2, y2 = bbox_center(child.bbox)
            dx, dy = x2 - x1, y2 - y1
            if dx == 0 and dy == 0:
                continue

            direction = classify_8_directions(dx, dy)
            if direction == "不明":
                continue
            dir_idx = direction_to_index[direction]

            # 正規化距離（支配方向）
            dx_abs, dy_abs = abs(dx), abs(dy)
            if dx_abs >= dy_abs:
                scale = max(parent.bbox[2], child.bbox[2])  # width
                dist = dx_abs
            else:
                scale = max(parent.bbox[3], child.bbox[3])  # height
                dist = dy_abs

            if scale == 0:
                continue
            norm_dist = dist / scale

            # ビンに分類
            bin_idx = np.digitize(norm_dist, bin_edges) - 1
            bin_idx = min(bin_idx, bin_count - 1)
            heatmap[dir_idx, bin_idx] += 1

In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l" + "r" * heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, heatmap):
    row_str = " & ".join(f"{v:,}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Parent-child frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:parent_child_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))

In [None]:
# ラベル
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]
bin_labels = ["(0, 1]", "(1, 2]", "(2, 4]", "(4, 8]", "(8, 16]", "(16, $\infty$)"]

# LaTeX 出力
latex = []
latex.append("\\begin{table}[t!]")
latex.append("\\centering")
latex.append("\\begin{tabular}{l|" + "r" * heatmap.shape[1] + "}")
latex.append("\\toprule")
latex.append("Direction \\textbackslash{} Distance & " + " & ".join(bin_labels) + " \\\\")
latex.append("\\midrule")

for label, row in zip(direction_labels, heatmap):
    row_str = " & ".join(f"{v/len(image_id_to_root)*1000:,.2f}" for v in row)
    latex.append(f"{label:<13} & {row_str} \\\\")

latex.append("\\bottomrule")
latex.append("\\end{tabular}")
latex.append("\\caption{Parent-child frequency by direction and normalized distance bin.}")
latex.append("\\label{tab:parent_child_direction_distance}")
latex.append("\\end{table}")

# 出力表示
print("\n".join(latex))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ----------------------------------------
# 入力 heatmap（8方向 × 6距離ビン）
# 方向順：["右", "右下", "下", "左下", "左", "左上", "上", "右上"]
# ----------------------------------------
# heatmap = np.array(...) などで与えてください
# log_heatmap = np.log2(heatmap + 1)  # shape: (8, 6)
log_heatmap = np.log2(heatmap / float(len(image_id_to_root)) * 1000 + 1)

# ----------------------------------------
# データ拡大（補間なし、コピーで繰り返し）
# ----------------------------------------
repeat_factor = 17  # 方向×距離ビンともに4倍（調整可）
Z = log_heatmap.T  # shape: (6, 8) → (rows: distance, cols: direction)
Z_repeat = np.repeat(np.repeat(Z, repeat_factor, axis=0), repeat_factor, axis=1)  # (24, 32)
Z_repeat = np.roll(Z_repeat, shift=-repeat_factor//2+1, axis=1)

# ----------------------------------------
# 極座標グリッド生成
# ----------------------------------------
theta_edges = np.linspace(0, 2 * np.pi, Z_repeat.shape[1] + 1) - (np.pi / Z_repeat.shape[1])
r_edges = np.linspace(0, 6, Z_repeat.shape[0] + 1)  # max bin index = 6
# r_edges = np.logspace(1e-10, np.log2(2**6), Z_repeat.shape[0] + 1, base=2)

Theta, R = np.meshgrid(theta_edges, r_edges)

# ----------------------------------------
# 描画
# ----------------------------------------
fig, ax = plt.subplots(subplot_kw=dict(polar=True), figsize=(8, 8))
ax.set_theta_direction(-1)  # 時計回りに描画
ax.set_theta_offset(0)      # θ=0 を右（3時）に固定

pcm = ax.pcolormesh(Theta, R, Z_repeat, cmap='Blues', shading='auto', vmin=0, vmax=12)

# ----------------------------------------
# ラベル（方向）＋平均値（実数）表示
# ----------------------------------------
direction_labels = [
    "Right", "Bottom-Right", "Bottom", "Bottom-Left",
    "Left", "Top-Left", "Top", "Top-Right"
]

# 実数値の平均（距離方向に平均）
dir_means_real = heatmap.sum(axis=1) / float(len(image_id_to_root))

# ラベルに平均値を追加（小数点1桁）
direction_labels_with_mean = [
    f"{label}\n{mean:.2f}" for label, mean in zip(direction_labels, dir_means_real)
]

tick_angles = np.linspace(0, 2 * np.pi, 8, endpoint=False)
ax.set_xticks(tick_angles)
ax.set_xticklabels(direction_labels_with_mean, fontsize=28)

# y軸（距離ビンラベル）
ax.set_yticks(np.arange(0, 7))
tick_texts = ax.set_yticklabels(["0", "1", "2", "4", "8", "16", r"$\infty$"], fontsize=28)
tick_texts[-1].set_fontsize(36)

# ----------------------------------------
# カラーバー
# ----------------------------------------
cbar = plt.colorbar(pcm, ax=ax, pad=0.12, shrink=0.7)
cbar.ax.tick_params(labelsize=28)
# cbar.set_label("log₂(count + 1)", fontsize=12)

# ----------------------------------------
# 表示・保存
# ----------------------------------------
ax.grid(False)
plt.tight_layout()
plt.savefig("./figures/parent_child_rose_log2.png", dpi=300, bbox_inches='tight')
plt.savefig("./figures/parent_child_rose_log2.eps", format="eps", dpi=300, bbox_inches='tight')
plt.show()
