In [6]:
import os, json, hashlib
from pathlib import Path
from typing import Dict, Tuple, Optional
import numpy as np
import cv2
import pandas as pd

In [None]:
import os
import shutil

splits = ['train','test','val']
datas = ["real", "adm"]

# splits = ['test']
# datas = ["pndm", "ddpm"]

for split in splits:
    for data in datas:
        # 指定源文件夹 A 和目标文件夹 B
        # src = f"/root/autodl-tmp/data/history/adaptive/diff_abs_no/{split}/lsun_adm/{data}/metrics_vis/DiffRecon"
        # dst = f"/root/autodl-tmp/data/metrics/adaptive/diff_abs_no/{split}/lsun_adm/{data}"
        
        src = f"/root/autodl-tmp/data/history/dire_adaptive/dire_vis/abs/{split}/lsun_adm/{data}"
        dst = f"/root/autodl-tmp/data/metrics/adaptive/dire_abs/{split}/lsun_adm/{data}"
        
        # 若目标文件夹 B 不存在，则创建
        os.makedirs(dst, exist_ok=True)
        
        # 遍历 A 中的所有文件与子文件夹
        for item in os.listdir(src):
            src_path = os.path.join(src, item)
            dst_path = os.path.join(dst, item)
            
            if os.path.isdir(src_path):
                # 复制整个文件夹
                shutil.copytree(src_path, dst_path, dirs_exist_ok=True)
            else:
                # 复制单个文件（保留元数据）
                shutil.copy2(src_path, dst_path)
        
        print(f"已将 {src} 中的所有内容复制到 {dst}/ 下。")

已将 /root/autodl-tmp/data/history/dire_adaptive/dire_vis/abs/train/lsun_adm/real 中的所有内容复制到 /root/autodl-tmp/data/metrics/adaptive/dire_abs/train/lsun_adm/real/ 下。


In [22]:
# =========================
# 0) CONFIG —— 修改这几行
# =========================
COMPUTE_DIRE_DIFF_ROOT = "/root/autodl-tmp/data/our_abs_no/diffrecon"      # 老链路 DiffRecon PNG
RECON_METRICS_DIFF_ROOT = "/root/autodl-tmp/data/metrics/fixed/diff_abs_no"  # 新链路 DiffRecon PNG

COMPUTE_DIRE_DIRE_ROOT = "/root/autodl-tmp/data/our_abs_no/dire"           # 老链路 Dire PNG（可设为 None）
RECON_METRICS_DIRE_ROOT = "/root/autodl-tmp/data/history/dire_fixed/dire_vis/abs"               # 新链路 Dire PNG（可设为 None）

REPORT_DIR = "/root/autodl-tmp/compare/1-3/fixed"   # CSV 报表输出目录
os.makedirs(REPORT_DIR, exist_ok=True)

In [23]:
# ==============
# 1) 小工具函数
# ==============
def list_pngs(root: Optional[str]) -> Dict[str, str]:
    """返回 {相对路径: 绝对路径} 的映射；root 为空或不存在则返回空。"""
    if root is None or not os.path.isdir(root):
        return {}
    root_path = Path(root).resolve()
    out = {}
    for p in root_path.rglob("*.png"):
        rel = p.relative_to(root_path).as_posix()
        out[rel] = str(p)
    return out

def list_images_ignore_ext(root: Optional[str]) -> Dict[str, str]:
    """
    扫描目录下所有常见图像文件（png/jpg/jpeg/webp/bmp），
    返回 {相对路径去除扩展名: 绝对路径} 的映射。
    用于忽略扩展名的对齐比较。
    """
    if root is None or not os.path.isdir(root):
        return {}
    root_path = Path(root).resolve()
    exts = [".png", ".jpg", ".jpeg", ".webp", ".bmp"]
    out = {}
    for p in root_path.rglob("*"):
        if p.suffix.lower() in exts:
            rel_noext = p.relative_to(root_path).with_suffix("").as_posix()
            out[rel_noext] = str(p)
    return out

def read_png_bgr(path: str) -> np.ndarray:
    """用 OpenCV 读取（BGR）。"""
    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    if img is None:
        raise FileNotFoundError(path)
    return img

def compare_images(a: np.ndarray, b: np.ndarray) -> Dict[str, float]:
    """同形状 uint8 图像的像素差指标。"""
    diff = cv2.absdiff(a, b)
    return {"L_inf": float(diff.max()), "L1_mean": float(diff.mean())}

def maybe_channel_swap_equal(a: np.ndarray, b: np.ndarray) -> Tuple[bool, Dict[str, float]]:
    """检测交换通道(BGR↔RGB)后是否相等，并给出交换后的误差。"""
    if a.ndim == 3 and a.shape[2] == 3 and b.ndim == 3 and b.shape[2] == 3:
        b_rgb = b[:, :, ::-1]
        if np.array_equal(a, b_rgb):
            return True, {"L_inf": 0.0, "L1_mean": 0.0}
        m = compare_images(a, b_rgb)
        return False, m
    return False, {"L_inf": np.inf, "L1_mean": np.inf}

def md5sum(path: str) -> str:
    h = hashlib.md5()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1<<20), b""):
            h.update(chunk)
    return h.hexdigest()

In [24]:
# ===================
# 2) 配对并比较一个集合
# ===================
def compare_trees(ref_root: Optional[str], new_root: Optional[str], label: str) -> pd.DataFrame:
    """对比两个 PNG 目录树（相对路径配对）。返回 DataFrame。"""
    ref = list_images_ignore_ext(ref_root)
    new = list_images_ignore_ext(new_root)
    all_keys = sorted(set(ref) | set(new))

    rows = []
    missing_ref = 0
    missing_new = 0
    for rel in all_keys:
        ref_path = ref.get(rel)
        new_path = new.get(rel)
        rec = {
            "set": label,
            "rel": rel,
            "ref_path": ref_path or "",
            "new_path": new_path or "",
            "exists_ref": bool(ref_path),
            "exists_new": bool(new_path),
            "same_shape": False,
            "equal_bytes": False,
            "L_inf": np.nan,
            "L1_mean": np.nan,
            "equal_if_rgb_bgr_swapped": False,
            "L_inf_if_swapped": np.nan,
            "L1_mean_if_swapped": np.nan,
            "ref_md5": "",
            "new_md5": ""
        }
        if not ref_path: missing_ref += 1
        if not new_path: missing_new += 1

        if ref_path and new_path:
            a = read_png_bgr(ref_path)
            b = read_png_bgr(new_path)
            rec["same_shape"] = (a.shape == b.shape)
            if rec["same_shape"]:
                rec["ref_md5"] = md5sum(ref_path)
                rec["new_md5"] = md5sum(new_path)
                rec["equal_bytes"] = np.array_equal(a, b)
                m = compare_images(a, b)
                rec["L_inf"] = m["L_inf"]
                rec["L1_mean"] = m["L1_mean"]
                swapped_equal, m2 = maybe_channel_swap_equal(a, b)
                rec["equal_if_rgb_bgr_swapped"] = swapped_equal
                rec["L_inf_if_swapped"] = m2["L_inf"]
                rec["L1_mean_if_swapped"] = m2["L1_mean"]

        rows.append(rec)

    df = pd.DataFrame(rows)
    print(f"[{label}] total={len(all_keys)}  missing_ref={missing_ref}  missing_new={missing_new}")
    if len(df) > 0:
        mism = df[~df["equal_bytes"].fillna(False)]
        print(f"[{label}] exactly_equal={len(df)-len(mism)}/{len(df)}")
        if len(mism) > 0:
            print(mism.head(10)[["rel","same_shape","L_inf","L1_mean","equal_if_rgb_bgr_swapped","L_inf_if_swapped","L1_mean_if_swapped"]])
    return df

In [25]:
# ===============
# 3) 执行对比并导出
# ===============
df_all = []

if COMPUTE_DIRE_DIFF_ROOT and RECON_METRICS_DIFF_ROOT:
    df_all.append(compare_trees(COMPUTE_DIRE_DIFF_ROOT, RECON_METRICS_DIFF_ROOT, "Diff"))

if COMPUTE_DIRE_DIRE_ROOT and RECON_METRICS_DIRE_ROOT:
    df_all.append(compare_trees(COMPUTE_DIRE_DIRE_ROOT, RECON_METRICS_DIRE_ROOT, "Dire"))

if not df_all:
    print("Nothing to compare. Please set the *_ROOT paths correctly.")
else:
    df = pd.concat(df_all, ignore_index=True)
    csv_path = os.path.join(REPORT_DIR, "png_compare_report.csv")
    df.to_csv(csv_path, index=False)
    print(f"[REPORT] CSV saved to: {csv_path}")
    # 展示一个简明视图
    df_view = df.loc[:, ["set","rel","same_shape","equal_bytes","L_inf","L1_mean","equal_if_rgb_bgr_swapped"]]
    df_view.head(50)
    

[Diff] total=8580  missing_ref=0  missing_new=180
[Diff] exactly_equal=0/8580
                     rel  same_shape  L_inf   L1_mean  \
0  test/lsun_adm/adm/119        True   55.0  5.990346   
1  test/lsun_adm/adm/121        True   59.0  9.506683   
2  test/lsun_adm/adm/122        True   55.0  4.671504   
3  test/lsun_adm/adm/123        True   47.0  5.600901   
4  test/lsun_adm/adm/134        True   49.0  4.317413   
5  test/lsun_adm/adm/138        True   55.0  5.877619   
6  test/lsun_adm/adm/146        True   73.0  9.750783   
7  test/lsun_adm/adm/171        True   51.0  7.836563   
8  test/lsun_adm/adm/190        True   66.0  6.305237   
9  test/lsun_adm/adm/198        True   51.0  3.614232   

   equal_if_rgb_bgr_swapped  L_inf_if_swapped  L1_mean_if_swapped  
0                     False             114.0            9.275665  
1                     False              78.0           11.408498  
2                     False              64.0            6.112645  
3                     

In [26]:
# ===============================
# 4) 额外统计 —— 找出缺失文件路径
# ===============================

if len(df_all) > 0:
    # 找出缺失样本
    df_missing_ref = df[df["exists_ref"] == False]
    df_missing_new = df[df["exists_new"] == False]

    print(f"\n[Missing Summary]")
    print(f"缺失于 compute_dire (missing_ref): {len(df_missing_ref)} 张")
    print(f"缺失于 recon+metrics (missing_new): {len(df_missing_new)} 张")

    # 分别导出 CSV
    miss_ref_csv = os.path.join(REPORT_DIR, "missing_in_compute_dire.csv")
    miss_new_csv = os.path.join(REPORT_DIR, "missing_in_recon_metrics.csv")
    df_missing_ref.to_csv(miss_ref_csv, index=False)
    df_missing_new.to_csv(miss_new_csv, index=False)
    print(f"[REPORT] Missing ref list saved: {miss_ref_csv}")
    print(f"[REPORT] Missing new list saved: {miss_new_csv}")

    # 如果只想快速看前几条，可直接打印
    print("\n前 10 个 compute_dire 缺失样本:")
    print(df_missing_ref.head(10)[["set", "rel", "new_path"]])
    print("\n前 10 个 recon+metrics 缺失样本:")
    print(df_missing_new.head(10)[["set", "rel", "ref_path"]])


[Missing Summary]
缺失于 compute_dire (missing_ref): 100 张
缺失于 recon+metrics (missing_new): 180 张
[REPORT] Missing ref list saved: /root/autodl-tmp/compare/1-3/fixed/missing_in_compute_dire.csv
[REPORT] Missing new list saved: /root/autodl-tmp/compare/1-3/fixed/missing_in_recon_metrics.csv

前 10 个 compute_dire 缺失样本:
       set                                                rel  \
8680  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8681  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8682  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8683  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8684  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8685  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8686  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8687  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8688  Dire  test/lsun_adm/ddpm/samples_ema1_step001164000....   
8689  Dire  test/lsun_adm/ddpm/sam