In [1]:
# ==========================================
# 三特徵融合（CLIP + ELA + PRNU）→ OOD 評估
# - 自動抓最新模型
# - 用同一份 splits_*.json 對齊樣本
# - Val 上訓練融合器（LogReg），測 Val/Test-IID/Test-OOD
# ==========================================
import os, json, glob, math, re, time
from pathlib import Path
import numpy as np
from tqdm import tqdm

import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, classification_report, roc_curve
import joblib

# ---------- 路徑設定 ----------
SCRIPT_ROOT = "/home/yaya/ai-detect-proj/Script"
OUTPUT_DIR  = os.path.join(SCRIPT_ROOT, "saved_models")
SPLITS_CAND = [
    os.path.join(OUTPUT_DIR, "splits_fewshot_iid_ood.json"),
    os.path.join(OUTPUT_DIR, "splits_clip_feature_iid_ood.json"),
]
SPLITS_JSON = next((p for p in SPLITS_CAND if os.path.isfile(p)), None)
assert SPLITS_JSON, "找不到 splits JSON，請先產生（fewshot 或 clip_feature）"
print("Using splits:", SPLITS_JSON)

# 資料目錄
CLIP_REAL_DIR = os.path.join(SCRIPT_ROOT, "features_npy", "clip_real_npy")
CLIP_FAKE_DIR = os.path.join(SCRIPT_ROOT, "features_npy", "clip_fake_npy")
ELA_REAL_DIR  = os.path.join(SCRIPT_ROOT, "features_npy", "ela_real_npy")
ELA_FAKE_DIR  = os.path.join(SCRIPT_ROOT, "features_npy", "ela_fake_npy")
PRNU_REAL_I8  = os.path.join(SCRIPT_ROOT, "features_quant", "prnu_real_i8")
PRNU_FAKE_I8  = os.path.join(SCRIPT_ROOT, "features_quant", "prnu_fake_i8")

device = "cuda" if torch.cuda.is_available() else "cpu"
print("device =", device)

# ---------- 讀 splits ----------
with open(SPLITS_JSON, "r", encoding="utf-8") as f:
    SPLITS = json.load(f)["splits"]

def is_real_clip(p: str) -> bool:
    s = Path(p).as_posix()
    return "/clip_real_npy/" in s

# 建 ELA/PRNU 檔名索引（用 basename 或 stem 對應）
def index_dir(d):
    idx={}
    for q in Path(d).glob("*.npy"):
        idx.setdefault(q.name.lower(), str(q))
        idx.setdefault(q.stem.lower(), str(q))
    for q in Path(d).glob("*.npz"):
        idx.setdefault(q.name.lower(), str(q))
        idx.setdefault(q.stem.lower(), str(q))
    return idx

IDX_ELA_REAL = index_dir(ELA_REAL_DIR)
IDX_ELA_FAKE = index_dir(ELA_FAKE_DIR)
IDX_PR_REAL  = index_dir(PRNU_REAL_I8)
IDX_PR_FAKE  = index_dir(PRNU_FAKE_I8)

def map_to(dir_idx_real, dir_idx_fake, p: str):
    """從 CLIP split 的路徑 map 到某種特徵（ELA/PRNU）的目錄"""
    y = 0 if is_real_clip(p) else 1
    key_bn = Path(p).name.lower(); key_st = Path(p).stem.lower()
    idx = dir_idx_real if y==0 else dir_idx_fake
    q = idx.get(key_bn) or idx.get(key_st)
    return q, y

def get_split_paths(name: str):
    clp = SPLITS.get(name, [])
    clip_paths = []; clip_y = []
    ela_paths  = []; ela_y  = []; miss_ela=0
    pr_paths   = []; pr_y   = []; miss_pr =0
    for p in clp:
        clip_paths.append(p); clip_y.append(0 if is_real_clip(p) else 1)
        q1,y1 = map_to(IDX_ELA_REAL, IDX_ELA_FAKE, p)
        if q1 is None: miss_ela += 1
        else: ela_paths.append(q1); ela_y.append(y1)
        q2,y2 = map_to(IDX_PR_REAL, IDX_PR_FAKE, p)
        if q2 is None: miss_pr += 1
        else: pr_paths.append(q2); pr_y.append(y2)
    print(f"[{name}] CLIP={len(clip_paths)} | ELA={len(ela_paths)}(miss {miss_ela}) | PRNU={len(pr_paths)}(miss {miss_pr})")
    return (clip_paths, np.array(clip_y,int),
            ela_paths, np.array(ela_y,int),
            pr_paths,  np.array(pr_y,int))

(val_clip, yv_clip, val_ela, yv_ela, val_pr, yv_pr) = get_split_paths("val")
(ti_clip, yti_clip, ti_ela, yti_ela, ti_pr, yti_pr) = get_split_paths("test_iid")
(to_clip, yto_clip, to_ela, yto_ela, to_pr, yto_pr) = get_split_paths("test_ood")

assert len(val_clip)>0 and len(val_ela)>0 and len(val_pr)>0, "Val split 對不到（請檢查命名/目錄）"
assert (len(yv_clip)==len(yv_ela)==len(yv_pr)), "Val 標籤長度不一致，請檢查索引"

# ---------- CLIP：載入最新 LinearSVC，Val 上做 sigmoid 校準 ----------
def latest_file(patterns):
    cand=[]
    for pat in (patterns if isinstance(patterns,(list,tuple)) else [patterns]):
        cand += glob.glob(os.path.join(OUTPUT_DIR, pat))
    assert cand, f"找不到模型：{patterns}"
    cand = sorted(cand, key=os.path.getmtime)
    return cand[-1]

CLIP_SVM_PATH = latest_file("clip_linear_svm_feature_*.joblib")
print("CLIP SVM:", CLIP_SVM_PATH)
svc = joblib.load(CLIP_SVM_PATH)

def load_clip_vec(p):
    v = np.asarray(np.load(p, allow_pickle=True)).astype(np.float32).reshape(-1)
    n = np.linalg.norm(v) + 1e-12
    return v / n

def load_clip_matrix(paths):
    X = np.stack([load_clip_vec(p) for p in tqdm(paths, desc="CLIP load")], axis=0)
    return X

# 校準（用 Val）
Xv_clip = load_clip_matrix(val_clip)
cal_clip = CalibratedClassifierCV(svc, method="sigmoid", cv="prefit")
cal_clip.fit(Xv_clip, yv_clip)  # 只學校準參數

# ---------- ELA：模型與讀檔 ----------
def load_ela_array(path):
    z = np.load(path, mmap_mode='r')
    if isinstance(z, np.lib.npyio.NpzFile):
        a = z.get('ela', z.get('arr', z.get('arr_0')))
    else:
        a = z
    a = np.asarray(a)
    if a.ndim == 2:
        a = np.repeat(a[...,None], 3, axis=2)
    elif a.ndim == 3 and a.shape[0] in (1,3) and a.shape[-1] not in (1,3):
        a = np.transpose(a, (1,2,0))
    elif a.ndim == 3 and a.shape[-1] == 1:
        a = np.repeat(a, 3, axis=2)
    a = a.astype(np.float32)
    if a.max() > 1.5: a *= (1/255.0)
    return a

def zscore3(x):
    m = x.mean(axis=(0,1), keepdims=True)
    s = x.std(axis=(0,1), keepdims=True); s[s<1e-6]=1.0
    return (x - m) / s

def crop_hw3(img, size=256, center=True):
    h,w,_ = img.shape
    if h < size or w < size:
        ph, pw = max(0,size-h), max(0,size-w)
        img = np.pad(img, ((ph//2,ph-ph//2),(pw//2,pw-pw//2),(0,0)), mode='edge')
        h,w,_ = img.shape
    if center:
        top=(h-size)//2; left=(w-size)//2
    else:
        top=0; left=0
    return img[top:top+size, left:left+size, :].copy()

class ELAForensicCNN(nn.Module):
    def __init__(self,in_ch=3):
        super().__init__()
        def bnblk(ci,co): 
            return nn.Sequential(nn.Conv2d(ci,co,3,padding=1,bias=False), nn.BatchNorm2d(co), nn.ReLU(True))
        def gnblk(ci,co,g=8):
            return nn.Sequential(nn.Conv2d(ci,co,3,padding=1,bias=False), nn.GroupNorm(num_groups=min(g,co), num_channels=co), nn.ReLU(True))
        self.net = nn.Sequential(
            bnblk(in_ch,32), bnblk(32,32), nn.AvgPool2d(2),
            bnblk(32,64),    bnblk(64,64), nn.AvgPool2d(2),
            gnblk(64,128),   gnblk(128,128), nn.AvgPool2d(2),
            gnblk(128,256),  gnblk(256,256), nn.AdaptiveAvgPool2d(1),
        )
        self.fc = nn.Linear(256,1)
    def forward(self,x): return self.fc(self.net(x).flatten(1)).squeeze(1)

ELA_MODEL_PATH = latest_file("ela_fromnpy_cnn_best_*.pt")
print("ELA model:", ELA_MODEL_PATH)
ela_model = ELAForensicCNN().to(device).eval()
ela_model.load_state_dict(torch.load(ELA_MODEL_PATH, map_location=device), strict=True)

@torch.no_grad()
def ela_probs(paths, batch=128, size=256):
    ps=[]
    for i in tqdm(range(0, len(paths), batch), desc="ELA infer"):
        xs=[]
        for p in paths[i:i+batch]:
            a = load_ela_array(p)
            a = crop_hw3(a, size=size, center=True)
            a = zscore3(a)
            xs.append(np.transpose(a,(2,0,1)))
        t = torch.from_numpy(np.stack(xs,0)).to(device)
        logit = ela_model(t.contiguous(memory_format=torch.channels_last))
        ps.extend(torch.sigmoid(logit).float().cpu().numpy().tolist())
    return np.array(ps)

# ---------- PRNU：模型與讀檔（int8） ----------
def load_i8_2d(path):
    a = np.load(path, mmap_mode='r')
    a = np.asarray(a)
    if a.ndim == 3 and (a.shape[0]==1 or a.shape[-1]==1):
        a = a.squeeze()
    assert a.ndim==2, f"{path} got {a.shape}"
    return a.astype(np.float32)

def per_image_norm(x):
    m, s = x.mean(), x.std()
    if not np.isfinite(s) or s<1e-6: s=20.0; m=0.0
    return (x - m) / s

def avg_pool_2x(x):
    H,W = x.shape; H2,W2 = H//2*2, W//2*2
    x = x[:H2,:W2].reshape(H2//2,2,W2//2,2).mean(axis=(1,3))
    return x

def crop_2d(img, size=256, center=True):
    h,w = img.shape
    if h < size or w < size:
        ph,pw = max(0,size-h), max(0,size-w)
        img = np.pad(img, ((ph//2,ph-ph//2),(pw//2,pw-pw//2)), mode='edge')
        h,w = img.shape
    if center:
        y0=(h-size)//2; x0=(w-size)//2
    else:
        y0=0; x0=0
    return img[y0:y0+size, x0:x0+size].copy()

class SmallForensicCNN(nn.Module):
    def __init__(self,in_ch=1):
        super().__init__()
        def blk(ci,co,g=8):
            return nn.Sequential(nn.Conv2d(ci,co,3,padding=1,bias=False),
                                 nn.GroupNorm(num_groups=min(g,co), num_channels=co),
                                 nn.ReLU(True))
        self.net = nn.Sequential(
            blk(in_ch,32), blk(32,32), nn.AvgPool2d(2),
            blk(32,64),    blk(64,64), nn.AvgPool2d(2),
            blk(64,128),   blk(128,128), nn.AdaptiveAvgPool2d(1),
        )
        self.fc = nn.Linear(128,1)
    def forward(self,x): return self.fc(self.net(x).flatten(1)).squeeze(1)

PRNU_MODEL_PATH = latest_file(["prnu_cnn_i8_best_*.pt","prnu_fromnpy_cnn_best_*.pt"])
print("PRNU model:", PRNU_MODEL_PATH)
pr_model = SmallForensicCNN(1).to(device).eval()
pr_model.load_state_dict(torch.load(PRNU_MODEL_PATH, map_location=device), strict=True)

@torch.no_grad()
def prnu_probs(paths, batch=128, size=256, downsample=True):
    ps=[]
    for i in tqdm(range(0, len(paths), batch), desc="PRNU infer"):
        xs=[]
        for p in paths[i:i+batch]:
            w = load_i8_2d(p)                 # int8→float32
            if downsample and (w.shape[0]>=512 and w.shape[1]>=512):
                w = avg_pool_2x(w)
            w = crop_2d(w, size=size, center=True)
            w = per_image_norm(w)
            xs.append(w[None,...])
        t = torch.from_numpy(np.stack(xs,0)).to(device)
        logit = pr_model(t.contiguous(memory_format=torch.channels_last))
        ps.extend(torch.sigmoid(logit).float().cpu().numpy().tolist())
    return np.array(ps)

# ---------- 取得三路機率 ----------
# Val
pv_clip = cal_clip.predict_proba(Xv_clip)[:,1]
pv_ela  = ela_probs(val_ela)
pv_pr   = prnu_probs(val_pr)
assert len(pv_clip)==len(pv_ela)==len(pv_pr)==len(yv_clip)

# Test-IID
Xi_clip = load_clip_matrix(ti_clip) if len(ti_clip) else np.zeros((0, Xv_clip.shape[1]), np.float32)
pi_clip = cal_clip.predict_proba(Xi_clip)[:,1] if len(ti_clip) else np.array([])
pi_ela  = ela_probs(ti_ela) if len(ti_ela) else np.array([])
pi_pr   = prnu_probs(ti_pr) if len(ti_pr) else np.array([])

# Test-OOD
Xo_clip = load_clip_matrix(to_clip) if len(to_clip) else np.zeros((0, Xv_clip.shape[1]), np.float32)
po_clip = cal_clip.predict_proba(Xo_clip)[:,1] if len(to_clip) else np.array([])
po_ela  = ela_probs(to_ela) if len(to_ela) else np.array([])
po_pr   = prnu_probs(to_pr) if len(to_pr) else np.array([])

# ---------- 融合器（在 Val 上學） ----------
def stack_feats(p1,p2,p3): 
    return np.stack([p1,p2,p3], axis=1).astype(np.float32)
Xv = stack_feats(pv_clip, pv_ela, pv_pr); yv = yv_clip
Xi = stack_feats(pi_clip, pi_ela, pi_pr) if len(pi_clip) else None
Xo = stack_feats(po_clip, po_ela, po_pr) if len(po_clip) else None

fuser = LogisticRegression(max_iter=1000, class_weight="balanced")
fuser.fit(Xv, yv)
print("Fusion weights:", fuser.coef_, "bias:", fuser.intercept_)

def eval_block(name, X, y):
    if X is None or len(X)==0:
        print(f"[{name}] (empty)"); return None
    proba = fuser.predict_proba(X)[:,1]
    auc = roc_auc_score(y, proba)
    # Youden 門檻（用 Val 的）
    return proba, auc

# 以 Val 找兩個門檻
p_val_fused, auc_val = eval_block("Val", Xv, yv)
fpr, tpr, thr = roc_curve(yv, p_val_fused)
thr_youden = float(thr[(tpr - fpr).argmax()])
idx = np.where(fpr <= 0.05)[0]
thr_fpr05 = float(thr[idx[-1]]) if len(idx) else float(thr[0])
print(f"[Val Fused] AUC={auc_val:.4f} | thr_youden={thr_youden:.3f} | thr_fpr@5%={thr_fpr05:.3f}")

def report(name, X, y, thr):
    if X is None or len(X)==0:
        print(f"[{name}] (empty)"); return
    p = fuser.predict_proba(X)[:,1]
    pred = (p >= thr).astype(int)
    acc = accuracy_score(y, pred)
    try: auc = roc_auc_score(y, p)
    except: auc = float("nan")
    print(f"\n[{name}] acc@thr={acc:.4f} | auc={auc:.4f} | thr={thr:.3f}")
    print(confusion_matrix(y, pred))
    print(classification_report(y, pred, digits=4))

# 報告（Youden 與 FPR@5%）
report("Val (Youden)", Xv, yv, thr_youden)
if Xi is not None: report("Test-IID (Youden)", Xi, yti_clip, thr_youden)
if Xo is not None: report("Test-OOD (Youden)", Xo, yto_clip, thr_youden)

report("Val (FPR@5%)", Xv, yv, thr_fpr05)
if Xi is not None: report("Test-IID (FPR@5%)", Xi, yti_clip, thr_fpr05)
if Xo is not None: report("Test-OOD (FPR@5%)", Xo, yto_clip, thr_fpr05)

# ---------- OOD 分來源（用 FPR@5% 門檻） ----------
def tag_from_path(p: str, y: int):
    s = Path(p).as_posix().lower()
    if y==0:
        return "unsplash" if "unsplash" in s else ("imagenet" if "imagenet" in s else "real")
    for k in ["dalle3","midjourney","sd3","flux","stable","sdxl","playground","kolors"]:
        if k in s: return k
    # 從檔名首段猜
    stem = Path(p).stem.lower().split("_")[0].split("-")[0]
    return stem or "fake"

def ood_breakdown(thr):
    if Xo is None: 
        print("\n[OOD breakdown] (empty)"); return
    p = fuser.predict_proba(Xo)[:,1]; y = yto_clip
    # 用 CLIP split 的原始路徑來抓標籤（tag）
    tags = [tag_from_path(pp, yy) for pp,yy in zip(to_clip,y)]
    buckets = {}
    for yy,pp,tag in zip(y,p,tags):
        buckets.setdefault(tag, []).append((yy,pp))
    print("\n== OOD per-dataset ==")
    for tag, lst in sorted(buckets.items(), key=lambda kv: -len(kv[1])):
        arr_y = np.array([a for a,_ in lst]); arr_p = np.array([b for _,b in lst])
        try: auc = roc_auc_score(arr_y, arr_p)
        except: auc = float("nan")
        pred = (arr_p >= thr).astype(int)
        acc = (pred==arr_y).mean()
        cm  = confusion_matrix(arr_y, pred)
        print(f"- {tag:10s} n={len(arr_y):5d} | acc={acc:.4f} | auc={auc:.4f}\n{cm}\n")

print("\n[OOD breakdown @ FPR@5%]")
ood_breakdown(thr_fpr05)


Using splits: /home/yaya/ai-detect-proj/Script/saved_models/splits_clip_feature_iid_ood.json
device = cuda
[val] CLIP=18000 | ELA=18000(miss 0) | PRNU=18000(miss 0)
[test_iid] CLIP=18000 | ELA=18000(miss 0) | PRNU=18000(miss 0)
[test_ood] CLIP=43989 | ELA=43989(miss 0) | PRNU=43989(miss 0)
CLIP SVM: /home/yaya/ai-detect-proj/Script/saved_models/clip_linear_svm_feature_20250815_170428.joblib


CLIP load: 100%|██████████| 18000/18000 [00:09<00:00, 1812.67it/s]


ELA model: /home/yaya/ai-detect-proj/Script/saved_models/ela_fromnpy_cnn_best_20250815_172022.pt
PRNU model: /home/yaya/ai-detect-proj/Script/saved_models/prnu_cnn_i8_best_20250815_183635.pt


ELA infer: 100%|██████████| 141/141 [01:52<00:00,  1.25it/s]
PRNU infer: 100%|██████████| 141/141 [00:41<00:00,  3.40it/s]
CLIP load: 100%|██████████| 18000/18000 [00:05<00:00, 3052.24it/s]
ELA infer: 100%|██████████| 141/141 [02:46<00:00,  1.18s/it]
PRNU infer: 100%|██████████| 141/141 [00:42<00:00,  3.35it/s]
CLIP load: 100%|██████████| 43989/43989 [00:16<00:00, 2676.14it/s]
ELA infer: 100%|██████████| 344/344 [07:10<00:00,  1.25s/it]
PRNU infer: 100%|██████████| 344/344 [01:35<00:00,  3.62it/s]


Fusion weights: [[7.38651767 2.42389983 0.25492934]] bias: [-4.71015858]
[Val Fused] AUC=0.9892 | thr_youden=0.733 | thr_fpr@5%=0.101

[Val (Youden)] acc@thr=0.9779 | auc=0.9892 | thr=0.733
[[ 5958    42]
 [  356 11644]]
              precision    recall  f1-score   support

           0     0.9436    0.9930    0.9677      6000
           1     0.9964    0.9703    0.9832     12000

    accuracy                         0.9779     18000
   macro avg     0.9700    0.9817    0.9754     18000
weighted avg     0.9788    0.9779    0.9780     18000


[Test-IID (Youden)] acc@thr=0.9793 | auc=0.9904 | thr=0.733
[[ 5961    39]
 [  334 11666]]
              precision    recall  f1-score   support

           0     0.9469    0.9935    0.9697      6000
           1     0.9967    0.9722    0.9843     12000

    accuracy                         0.9793     18000
   macro avg     0.9718    0.9828    0.9770     18000
weighted avg     0.9801    0.9793    0.9794     18000


[Test-OOD (Youden)] acc@thr=0.48



In [2]:
# 只用 ELA+PRNU 在 Val 訓練融合器，然後評估
Xv_2 = np.stack([pv_ela, pv_pr], axis=1).astype(np.float32); yv_2 = yv
Xi_2 = np.stack([pi_ela, pi_pr], axis=1).astype(np.float32) if len(pi_ela) else None
Xo_2 = np.stack([po_ela, po_pr], axis=1).astype(np.float32) if len(po_ela) else None

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score, confusion_matrix, classification_report

fuser_2 = LogisticRegression(max_iter=1000, class_weight="balanced", C=0.5)
fuser_2.fit(Xv_2, yv_2)

p_val_2 = fuser_2.predict_proba(Xv_2)[:,1]
fpr,tpr,thr = roc_curve(yv_2, p_val_2)
thr_y_2 = float(thr[(tpr-fpr).argmax()])
idx = np.where(fpr<=0.05)[0]; thr_f05_2 = float(thr[idx[-1]]) if len(idx) else float(thr[0])

def report(name, X, y, thr):
    if X is None: print(f"[{name}] empty"); return
    p = fuser_2.predict_proba(X)[:,1]
    pred = (p>=thr).astype(int)
    print(f"\n[{name}] acc={ (pred==y).mean():.4f} | auc={roc_auc_score(y,p):.4f} | thr={thr:.3f}")
    print(confusion_matrix(y, pred))
    print(classification_report(y, pred, digits=4))

report("Val (Youden, ELA+PRNU)", Xv_2, yv_2, thr_y_2)
if Xi_2 is not None: report("Test-IID (Youden, ELA+PRNU)", Xi_2, yti_clip, thr_y_2)
if Xo_2 is not None: report("Test-OOD (Youden, ELA+PRNU)", Xo_2, yto_clip, thr_y_2)

report("Val (FPR@5%, ELA+PRNU)", Xv_2, yv_2, thr_f05_2)
if Xi_2 is not None: report("Test-IID (FPR@5%, ELA+PRNU)", Xi_2, yti_clip, thr_f05_2)
if Xo_2 is not None: report("Test-OOD (FPR@5%, ELA+PRNU)", Xo_2, yto_clip, thr_f05_2)



[Val (Youden, ELA+PRNU)] acc=0.9353 | auc=0.9729 | thr=0.467
[[ 5708   292]
 [  873 11127]]
              precision    recall  f1-score   support

           0     0.8673    0.9513    0.9074      6000
           1     0.9744    0.9273    0.9503     12000

    accuracy                         0.9353     18000
   macro avg     0.9209    0.9393    0.9288     18000
weighted avg     0.9387    0.9353    0.9360     18000


[Test-IID (Youden, ELA+PRNU)] acc=0.9382 | auc=0.9753 | thr=0.467
[[ 5714   286]
 [  827 11173]]
              precision    recall  f1-score   support

           0     0.8736    0.9523    0.9113      6000
           1     0.9750    0.9311    0.9526     12000

    accuracy                         0.9382     18000
   macro avg     0.9243    0.9417    0.9319     18000
weighted avg     0.9412    0.9382    0.9388     18000


[Test-OOD (Youden, ELA+PRNU)] acc=0.7845 | auc=0.8802 | thr=0.467
[[19791  5198]
 [ 4283 14717]]
              precision    recall  f1-score   support

  

In [3]:
# 以 Val 的真實類加權（權重可從 2~5 試）
w = np.where(yv==0, 3.0, 1.0).astype(np.float32)

from sklearn.linear_model import LogisticRegression
fuser_cs = LogisticRegression(max_iter=1000, C=0.5)  # 可再小一點更保守
fuser_cs.fit(np.stack([pv_clip,pv_ela,pv_pr],1), yv, sample_weight=w)

# 門檻沿用 Val ROC
from sklearn.metrics import roc_curve
p_val_cs = fuser_cs.predict_proba(np.stack([pv_clip,pv_ela,pv_pr],1))[:,1]
fpr,tpr,thr = roc_curve(yv, p_val_cs)
thr_y_cs = float(thr[(tpr-fpr).argmax()])
idx = np.where(fpr<=0.05)[0]; thr_f05_cs = float(thr[idx[-1]]) if len(idx) else float(thr[0])

def report_cs(name, X, y, thr):
    if X is None: print(f"[{name}] empty"); return
    p = fuser_cs.predict_proba(X)[:,1]; pred=(p>=thr).astype(int)
    from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, classification_report
    print(f"\n[{name}] acc={ (pred==y).mean():.4f} | auc={roc_auc_score(y,p):.4f} | thr={thr:.3f}")
    print(confusion_matrix(y, pred)); print(classification_report(y, pred, digits=4))

Xi = np.stack([pi_clip,pi_ela,pi_pr],1) if len(pi_clip) else None
Xo = np.stack([po_clip,po_ela,po_pr],1) if len(po_clip) else None
report_cs("Test-OOD (Youden, cost-sensitive)", Xo, yto_clip, thr_y_cs)
report_cs("Test-OOD (FPR@5%, cost-sensitive)", Xo, yto_clip, thr_f05_cs)



[Test-OOD (Youden, cost-sensitive)] acc=0.4814 | auc=0.8844 | thr=0.639
[[ 2198 22791]
 [   21 18979]]
              precision    recall  f1-score   support

           0     0.9905    0.0880    0.1616     24989
           1     0.4544    0.9989    0.6246     19000

    accuracy                         0.4814     43989
   macro avg     0.7225    0.5434    0.3931     43989
weighted avg     0.7590    0.4814    0.3616     43989


[Test-OOD (FPR@5%, cost-sensitive)] acc=0.4517 | auc=0.8844 | thr=0.072
[[  871 24118]
 [    0 19000]]
              precision    recall  f1-score   support

           0     1.0000    0.0349    0.0674     24989
           1     0.4407    1.0000    0.6117     19000

    accuracy                         0.4517     43989
   macro avg     0.7203    0.5174    0.3396     43989
weighted avg     0.7584    0.4517    0.3025     43989



In [2]:
# ======================================================
# Rename DIV2K npy → "DIV2K__000001.npy" (consistent across features)
# ======================================================
from pathlib import Path
import csv

# ---- 設定 ----
OUT_ROOT = Path("/home/yaya/ai-detect-proj/Script/features_npy")  # 你的特徵輸出根目錄
NEW_PREFIX = "DIV2K"                      # 新檔名前綴
MATCH_PREFIXES = ("DIV2K", "DIV2K_HR")    # 視為 DIV2K 的舊前綴
DRY_RUN = False                           # True=只預覽不改名
START_INDEX = 1                           # 編號起始（通常 1）
PAD_WIDTH = None                          # None=自動；或手動給 6 → 000001

# ---- 收集目標檔案（僅限 *_npy 資料夾）----
assert OUT_ROOT.exists(), f"找不到目錄：{OUT_ROOT}"
all_npy = [p for p in OUT_ROOT.rglob("*.npy") if p.parent.name.endswith("_npy")]

def is_div2k_stem(stem: str) -> bool:
    up = stem.upper()
    return any(up.startswith(pref.upper()) for pref in MATCH_PREFIXES)

target_files = [p for p in all_npy if is_div2k_stem(p.stem)]
if not target_files:
    raise SystemExit("在 features_npy 中找不到 DIV2K 相關的 .npy 檔；請確認 MATCH_PREFIXES 或路徑。")

# ---- 建立「舊stem → 新stem」的編號映射（跨特徵一致）----
unique_stems = sorted({p.stem for p in target_files})
total = len(unique_stems)
pad = PAD_WIDTH or max(4, len(str(total)))
mapping = {
    stem: f"{NEW_PREFIX}__{idx:0{pad}d}"
    for idx, stem in enumerate(unique_stems, start=START_INDEX)
}

# ---- 規劃改名 & 檢查衝突 ----
plan = []
conflicts = []
for src in target_files:
    new_stem = mapping[src.stem]
    dst = src.with_name(new_stem + src.suffix)
    plan.append((src, dst))
    if dst.exists() and dst != src:
        conflicts.append((src, dst))

print(f"將改名 {len(plan)} 個檔案；唯一影像數 {len(unique_stems)}；零填位數 {pad}。")
if conflicts:
    print("⚠️ 偵測到會覆蓋既有檔案，為安全起見先中止。衝突範例：")
    for s, d in conflicts[:10]:
        print(" -", d)
    raise SystemExit("請先處理衝突或調整 MATCH_PREFIXES。")

# ---- 輸出對照表 CSV（可用來回復舊名）----
map_csv = OUT_ROOT / f"DIV2K_rename_map_{pad}d.csv"
with open(map_csv, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["old_path","new_path","old_stem","new_stem"])
    for s, d in plan:
        w.writerow([str(s), str(d), s.stem, d.stem])
print("📝 對照表已輸出：", map_csv)

# ---- 執行改名 ----
if not DRY_RUN:
    for s, d in plan:
        s.rename(d)
    print("✅ 已完成改名。")
else:
    print("（DRY_RUN=True 僅預覽，未實際改名）")


將改名 2700 個檔案；唯一影像數 900；零填位數 4。
📝 對照表已輸出： /home/yaya/ai-detect-proj/Script/features_npy/DIV2K_rename_map_4d.csv
✅ 已完成改名。


In [4]:
# ======================================================
# Rename COCO2017 npy → "COCO2017__000001.npy"（跨特徵共享編號）
# ======================================================
from pathlib import Path
import csv

# ---- 設定 ----
OUT_ROOT = Path("/home/yaya/ai-detect-proj/Script/features_npy")   # 你的特徵輸出根目錄
NEW_PREFIX = "COCO2017"                    # 新檔名前綴
# 你的產生器以前大多會用到這些前綴（可視需要增減）
MATCH_PREFIXES = ("coco2017_ge512", "coco_ge512", "coco2017", "COCO2017")
DRY_RUN = False                            # True=只預覽不改名
START_INDEX = 1                            # 編號起始
PAD_WIDTH = None                           # None=自動位數；或手動給 6 → 000001

# ---- 收集目標檔案（僅限 *_npy 資料夾）----
assert OUT_ROOT.exists(), f"找不到目錄：{OUT_ROOT}"
all_npy = [p for p in OUT_ROOT.rglob("*.npy") if p.parent.name.endswith("_npy")]

def is_coco_stem(stem: str) -> bool:
    up = stem.upper()
    return any(up.startswith(pref.upper()) for pref in MATCH_PREFIXES)

target_files = [p for p in all_npy if is_coco_stem(p.stem)]
if not target_files:
    raise SystemExit("在 features_npy 中找不到 COCO2017 相關的 .npy；請確認 MATCH_PREFIXES 或路徑。")

# ---- 建立「舊 stem → 新 stem」的映射（跨特徵一致）----
unique_stems = sorted({p.stem for p in target_files})
total = len(unique_stems)
pad = PAD_WIDTH or max(4, len(str(total)))
mapping = {
    stem: f"{NEW_PREFIX}__{idx:0{pad}d}"
    for idx, stem in enumerate(unique_stems, start=START_INDEX)
}

# ---- 規劃改名 & 檢查衝突 ----
plan, conflicts = [], []
for src in target_files:
    new_stem = mapping[src.stem]
    dst = src.with_name(new_stem + src.suffix)
    plan.append((src, dst))
    if dst.exists() and dst != src:
        conflicts.append((src, dst))

print(f"將改名 {len(plan)} 個檔案；唯一影像數 {len(unique_stems)}；零填位數 {pad}。")
if conflicts:
    print("⚠️ 偵測到將覆蓋既有檔案，為安全起見先中止。衝突範例：")
    for s, d in conflicts[:10]:
        print(" -", d)
    raise SystemExit("請先處理衝突或調整 MATCH_PREFIXES。")

# ---- 輸出對照表 CSV（可拿來回復舊名）----
map_csv = OUT_ROOT / f"COCO2017_rename_map_{pad}d.csv"
with open(map_csv, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["old_path","new_path","old_stem","new_stem"])
    for s, d in plan:
        w.writerow([str(s), str(d), s.stem, d.stem])
print("📝 對照表已輸出：", map_csv)

# ---- 執行改名 ----
if not DRY_RUN:
    for s, d in plan:
        s.rename(d)
    print("✅ 已完成改名。")
else:
    print("（DRY_RUN=True 僅預覽，未實際改名）")


將改名 26355 個檔案；唯一影像數 8785；零填位數 4。
📝 對照表已輸出： /home/yaya/ai-detect-proj/Script/features_npy/COCO2017_rename_map_4d.csv
✅ 已完成改名。


In [6]:
# ======================================================
# Rename Places365 npy → "PLACES365__000001.npy"（跨特徵共享編號）
# ======================================================
from pathlib import Path
import csv

# ---- 設定 ----
OUT_ROOT = Path("/home/yaya/ai-detect-proj/Script/features_npy")   # 你的特徵輸出根目錄
NEW_PREFIX = "PLACES365"                   # 新檔名前綴
# 依你前面兩種 Places 產生器的前綴做匹配（可自行增減）
MATCH_PREFIXES = (
    "Places365_valHR_15k_gt512",
    "Places365_val256_15k",
    "Places365",
    "PLACES365",
)
DRY_RUN = False                            # True=只預覽不改名
START_INDEX = 1                            # 編號起始
PAD_WIDTH = None                           # None=自動位數；或手動給 6 → 000001

# ---- 收集目標檔案（僅限 *_npy 資料夾）----
assert OUT_ROOT.exists(), f"找不到目錄：{OUT_ROOT}"
all_npy = [p for p in OUT_ROOT.rglob("*.npy") if p.parent.name.endswith("_npy")]

def is_places_stem(stem: str) -> bool:
    up = stem.upper()
    return any(up.startswith(pref.upper()) for pref in MATCH_PREFIXES)

target_files = [p for p in all_npy if is_places_stem(p.stem)]
if not target_files:
    raise SystemExit("在 features_npy 中找不到 Places365 相關的 .npy；請確認 MATCH_PREFIXES 或路徑。")

# ---- 建立「舊 stem → 新 stem」的映射（跨特徵一致）----
unique_stems = sorted({p.stem for p in target_files})
total = len(unique_stems)
pad = PAD_WIDTH or max(4, len(str(total)))
mapping = {
    stem: f"{NEW_PREFIX}__{idx:0{pad}d}"
    for idx, stem in enumerate(unique_stems, start=START_INDEX)
}

# ---- 規劃改名 & 檢查衝突 ----
plan, conflicts = [], []
for src in target_files:
    new_stem = mapping[src.stem]
    dst = src.with_name(new_stem + src.suffix)
    plan.append((src, dst))
    if dst.exists() and dst != src:
        conflicts.append((src, dst))

print(f"將改名 {len(plan)} 個檔案；唯一影像數 {len(unique_stems)}；零填位數 {pad}。")
if conflicts:
    print("⚠️ 偵測到將覆蓋既有檔案，為安全起見先中止。衝突範例：")
    for s, d in conflicts[:10]:
        print(" -", d)
    raise SystemExit("請先處理衝突或調整 MATCH_PREFIXES。")

# ---- 輸出對照表 CSV（可拿來回復舊名）----
map_csv = OUT_ROOT / f"PLACES365_rename_map_{pad}d.csv"
with open(map_csv, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["old_path","new_path","old_stem","new_stem"])
    for s, d in plan:
        w.writerow([str(s), str(d), s.stem, d.stem])
print("📝 對照表已輸出：", map_csv)

# ---- 執行改名 ----
if not DRY_RUN:
    for s, d in plan:
        s.rename(d)
    print("✅ 已完成改名。")
else:
    print("（DRY_RUN=True 僅預覽，未實際改名）")


將改名 45000 個檔案；唯一影像數 15000；零填位數 5。
📝 對照表已輸出： /home/yaya/ai-detect-proj/Script/features_npy/PLACES365_rename_map_5d.csv
✅ 已完成改名。
