In [4]:
#!/usr/bin/env python
# rq1_stats_table_rr_br.py  –  RR / BR  の Wilcoxon＋Holm＋Cliff’s Δ

import os, json, math, itertools, warnings
from pathlib import Path

import numpy as np
import pandas as pd
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests

# ────── 固定パラメータ ──────────────────────────────────────────
DATASETS   = ["c100", "tiny-imagenet"]
K          = 0
TGT_RANKS  = [1, 2, 3]
MISCLF_TPS = ["src_tgt", "tgt_fp", "tgt_fn"]
REPS       = range(5)
WNUM       = 472

ALPHA      = 10/11
ALPHA_STR  = f"alpha{ALPHA}_boundsArachne"
ROOT_TMPL  = "/src/src/out_vit_{ds}_fold{K}"

METHODS = {"reptran": "ours", "arachne": "bl", "random_r": "random_r", "random_a": "random_a"}
PAIRS = list(itertools.combinations(METHODS.keys(), 2))

METRIC_INFO = dict(
    T=("tot_time",  "Total Time"),
)

In [5]:
# ────── JSON 読み出し ───────────────────────────────────────────
def metric_value(ds, mtype, rank, rep, method_key, json_key):
    base = Path(ROOT_TMPL.format(ds=ds, K=K))
    jdir = base / f"misclf_top{rank}" / f"{mtype}_repair_weight_by_de"
    if method_key == "reptran":
        fn = f"exp-repair-4-1-metrics_for_repair_n{WNUM}_{ALPHA_STR}_ours_reps{rep}.json"
    elif method_key == "random_r":
        fn = f"exp-repair-4-1-metrics_for_repair_n{WNUM}_{ALPHA_STR}_random_reps{rep}.json"
    elif method_key == "random_a":
        fn = f"exp-repair-3-2-metrics_for_repair_{ALPHA_STR}_random_reps{rep}.json"
    elif method_key == "arachne":
        fn = f"exp-repair-3-1-metrics_for_repair_{ALPHA_STR}_bl_reps{rep}.json"
    with open(jdir / fn) as f:
        return json.load(f)[json_key]

# ────── Wilcoxon & Cliff’s Δ (対応あり) ──────────────────────────
def paired_cliffs_delta(v1: np.ndarray, v2: np.ndarray):
    """対応あり Cliff’s Δ  =  (n_pos - n_neg) / N"""
    diff = v1 - v2
    n_pos = np.sum(diff > 0)
    n_neg = np.sum(diff < 0)
    return (n_pos - n_neg) / diff.size if diff.size else 0.0

def make_tag(m1, m2):
    """例: make_tag('reptran', 'random_a') -> 'R vs. Rand_A'"""
    # タグ名変換辞書
    TAG_MAP = {
        "reptran": "R",
        "arachne": "A",
        "random_r": "Rand_R",
        "random_a": "Rand_A",
    }
    return f"{TAG_MAP[m1]} vs. {TAG_MAP[m2]}"

def wilcoxon_block(values):
    """values = {method: np.array(15)}   ->   {OvA_p, OvA_d, …}"""
    out = {}
    p_raw = []
    # 生 p と Δ をまず計算
    for m1, m2 in PAIRS:
        v1, v2 = values[m1], values[m2]
        if np.allclose(v1, v2):
            p = 1.0
        else:
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=UserWarning)
                p = wilcoxon(v1, v2, zero_method="wilcox").pvalue
        d = paired_cliffs_delta(v1, v2)
        tag = make_tag(m1, m2)
        out[f"{tag}_p_raw"] = p
        out[f"{tag}_d"]     = d
        p_raw.append(p)

    # Holm 補正
    _, p_adj, _, _ = multipletests(p_raw, method="holm")
    for (m1, m2), p_c in zip(PAIRS, p_adj):
        tag = make_tag(m1, m2)
        out[f"{tag}_p"] = p_c
    return out

def star(p):
    return "***" if p <= .001 else "**" if p <= .01 else "*" if p <= .05 else ""

def cell(d, p):
    return f"'{d:+.2f} {star(p)}"      # +0.45 ** のように符号を残す

In [6]:
# ────── main ────────────────────────────────────────────────────
for metric_tag, (json_key, nice_name) in METRIC_INFO.items():
    rows = []
    for ds, mtype in itertools.product(DATASETS, MISCLF_TPS):
        # 15 データ点 × 3 手法
        vals = {m: [] for m in METHODS}
        for rank, rep in itertools.product(TGT_RANKS, REPS):
            for m, _ in METHODS.items():
                vals[m].append(
                    metric_value(ds, mtype, rank, rep, m, json_key)
                )
        vals = {m: np.array(v) for m, v in vals.items()}

        stat = wilcoxon_block(vals)
        print(stat.keys())
        # tag の一覧を得る（順序を固定）
        comparison_tags = [make_tag(m1, m2) for m1, m2 in PAIRS]

        row = {
            "dataset": ds,
            "misclf_type": mtype,
        }
        for tag in comparison_tags:
            row[tag] = cell(stat[f"{tag}_d"], stat[f"{tag}_p"])
        rows.append(row)

    # 並べ替え & 保存
    order = dict(dataset=DATASETS,
                 misclf_type=MISCLF_TPS)
    df = pd.DataFrame(rows).sort_values(
        ["dataset", "misclf_type"],
        key=lambda s: s.map({v: i for col in ["dataset","misclf_type"]
                                   for i,v in enumerate(order[col])})
    )
    out_csv = f"exp-repair-4-2-3_wilcoxon_cliffs_{metric_tag}.csv"
    df.to_csv(out_csv, index=False)
    print(f"[✓] {nice_name}  →  {out_csv}")


dict_keys(['R vs. A_p_raw', 'R vs. A_d', 'R vs. Rand_R_p_raw', 'R vs. Rand_R_d', 'R vs. Rand_A_p_raw', 'R vs. Rand_A_d', 'A vs. Rand_R_p_raw', 'A vs. Rand_R_d', 'A vs. Rand_A_p_raw', 'A vs. Rand_A_d', 'Rand_R vs. Rand_A_p_raw', 'Rand_R vs. Rand_A_d', 'R vs. A_p', 'R vs. Rand_R_p', 'R vs. Rand_A_p', 'A vs. Rand_R_p', 'A vs. Rand_A_p', 'Rand_R vs. Rand_A_p'])
dict_keys(['R vs. A_p_raw', 'R vs. A_d', 'R vs. Rand_R_p_raw', 'R vs. Rand_R_d', 'R vs. Rand_A_p_raw', 'R vs. Rand_A_d', 'A vs. Rand_R_p_raw', 'A vs. Rand_R_d', 'A vs. Rand_A_p_raw', 'A vs. Rand_A_d', 'Rand_R vs. Rand_A_p_raw', 'Rand_R vs. Rand_A_d', 'R vs. A_p', 'R vs. Rand_R_p', 'R vs. Rand_A_p', 'A vs. Rand_R_p', 'A vs. Rand_A_p', 'Rand_R vs. Rand_A_p'])
dict_keys(['R vs. A_p_raw', 'R vs. A_d', 'R vs. Rand_R_p_raw', 'R vs. Rand_R_d', 'R vs. Rand_A_p_raw', 'R vs. Rand_A_d', 'A vs. Rand_R_p_raw', 'A vs. Rand_R_d', 'A vs. Rand_A_p_raw', 'A vs. Rand_A_d', 'Rand_R vs. Rand_A_p_raw', 'Rand_R vs. Rand_A_d', 'R vs. A_p', 'R vs. Rand_R_p'