In [174]:
# ==== CELL 0: CONFIG & DEVICE ====
import os, torch

# ƒê∆∞·ªùng d·∫´n d·ªØ li·ªáu (ƒë√∫ng c·∫•u tr√∫c Train/Validate/Test nh∆∞ b·∫°n d√πng)
ROOT = r"C:/TRAIN/Deep Learning/vietnamese-foods/Images"
root_test = f"{ROOT}/Test"

IMG_SIZE = 224
BATCH_SIZE = 32

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cuda


In [175]:
# ==== CELL 1: TRANSFORMS GI·ªêNG L√öC TRAIN ====
from torchvision import transforms

test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    # N·∫øu l√∫c train c√≥ Normalize(mean, std) th√¨ PH·∫¢I d√πng y chang:
    # transforms.Normalize(mean=[0.485, 0.456, 0.406],
    #                      std=[0.229, 0.224, 0.225]),
])

In [176]:
# ==== CELL 2: H√ÄM BUILD + G·ªåI H√ÄM => T·∫†O test_loader, class_names ====
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

def build_test_loader(root_test_dir, transform, batch_size):
    dataset = ImageFolder(root=root_test_dir, transform=transform)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    class_names = list(dataset.classes)            # th·ª© t·ª± theo th∆∞ m·ª•c c·ªßa ImageFolder
    class_to_idx = dict(dataset.class_to_idx)      # mapping {class_name: idx} c·ªßa dataset
    return loader, class_names, class_to_idx

# >>> G·ªåI H√ÄM (r·∫•t hay b·ªã qu√™n)
test_loader, class_names, ds_class_to_idx = build_test_loader(root_test, test_transform, BATCH_SIZE)

print("S·ªë l·ªõp:", len(class_names))
print("V√≠ d·ª• l·ªõp:", class_names[:33])

S·ªë l·ªõp: 33
V√≠ d·ª• l·ªõp: ['Banh beo', 'Banh bot loc', 'Banh can', 'Banh canh', 'Banh chung', 'Banh cuon', 'Banh duc', 'Banh gio', 'Banh khot', 'Banh mi', 'Banh pia', 'Banh tet', 'Banh trang nuong', 'Banh xeo', 'Bun bo Hue', 'Bun dau mam tom', 'Bun mam', 'Bun rieu', 'Bun thit nuong', 'Ca kho to', 'Canh chua', 'Cao lau', 'Chao long', 'Com tam', 'Goi cuon', 'Hu tieu', 'Mi quang', 'Nem chua', 'Pho', 'Xoi xeo', 'banh_da_lon', 'banh_tieu', 'banh_trung_thu']


In [178]:
# === CELL 3: CORE + HELPERS (gi·ªØ nguy√™n class_names ƒë√£ n·∫°p ·ªü Cell 2) ===
import os, json, numpy as np, torch
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support

assert "class_names" in globals(), "B·∫°n ph·∫£i n·∫°p class_names ·ªü Cell 2 tr∆∞·ªõc cell n√†y!"
NUM_CLASSES = len(class_names)
name2idx = {n:i for i,n in enumerate(class_names)}

def make_remap(src_class_to_idx: dict, target_name2idx: dict):
    """T·∫°o b·∫£ng remap index c·ªßa dataset loader -> index theo class_names chu·∫©n."""
    ds_idx2name = {v:k for k,v in src_class_to_idx.items()}
    return {src_idx: target_name2idx[ds_idx2name[src_idx]] for src_idx in ds_idx2name}


@torch.no_grad()
def collect_logits(model, loader, device, remap=None):
    """
    Tr·∫£ v·ªÅ y_true, y_pred, y_prob v·ªõi c√πng th·ª© t·ª± class_names ƒë√£ c·ªë ƒë·ªãnh.
    remap: dict idx_dataset -> idx_chu·∫©n (n·∫øu c·∫ßn)
    """
    y_true, y_pred, y_prob = [], [], []

    model.eval()
    # dtype c·ªßa model (th∆∞·ªùng l√† float32)
    model_dtype = next(model.parameters()).dtype

    for x, y in loader:
        # ƒê∆∞a ·∫£nh v·ªÅ ƒë√∫ng device + dtype c·ªßa model
        x = x.to(device=device, dtype=model_dtype)

        # Remap nh√£n n·∫øu dataset idx kh√°c class_names
        if remap is not None:
            # gi·ªØ y tr√™n CPU v√¨ ch·ªâ d√πng ƒë·ªÉ so s√°nh/stack
            y = torch.as_tensor([remap[int(t)] for t in y], dtype=torch.long)
        else:
            y = y.detach().to("cpu", dtype=torch.long)

        # Suy lu·∫≠n
        logits = model(x)                      # logits c√≥ dtype == model_dtype
        prob = torch.softmax(logits, dim=1).cpu().numpy()
        pred = prob.argmax(1)

        y_prob.append(prob)
        y_pred.append(pred)
        y_true.append(y.numpy())

    import numpy as np
    return np.concatenate(y_true), np.concatenate(y_pred), np.concatenate(y_prob)


def cm_pretty(cm_counts, labels):
    cm = cm_counts.astype(float) / cm_counts.sum(axis=1, keepdims=True)
    cm = np.nan_to_num(cm)
    return cm

def draw_cm(cm_counts, run_name, save_dir="images", threshold=0.10, figsize=(12,10), dpi=200):
    import matplotlib.pyplot as plt, seaborn as sns
    os.makedirs(save_dir, exist_ok=True)
    cm = cm_pretty(cm_counts, class_names)
    fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
    sns.heatmap(cm, vmin=0, vmax=1, cmap="Blues", square=True,
                xticklabels=class_names, yticklabels=class_names, ax=ax, cbar_kws={'shrink': .7})
    # annotate: ƒë∆∞·ªùng ch√©o + √¥ >= threshold
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            show = (i == j) or (cm[i,j] >= threshold)
            if show and cm_counts[i,j] > 0:
                ax.text(j+0.5, i+0.5, f"{cm[i,j]*100:.0f}%\n({cm_counts[i,j]})",
                        ha="center", va="center", fontsize=7, color="black")
    ax.set_xlabel("Predicted"); ax.set_ylabel("True")
    ax.set_title(f"Confusion Matrix (row-norm) ‚Äì {run_name}", pad=10)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right", fontsize=8)
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=9)
    plt.tight_layout()
    out_png = os.path.join(save_dir, f"{run_name}_cm.png")
    plt.savefig(out_png, dpi=300, bbox_inches="tight"); plt.show()
    print("‚úì L∆∞u CM:", out_png)


In [179]:
# QUICK CHECK tr∆∞·ªõc khi v·∫Ω
try:
    _ = iter(test_loader)
    assert len(class_names) > 0
    print("‚úÖ test_loader & class_names s·∫µn s√†ng.")
except Exception as e:
    print("‚ùå L·ªói:", e)


‚úÖ test_loader & class_names s·∫µn s√†ng.


In [180]:
# ==== CELL 4: PLOTS ‚Äì CM & TOP CONFUSIONS ====
import os, numpy as np, matplotlib.pyplot as plt, seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_recall_fscore_support

os.makedirs("images", exist_ok=True)

def plot_confusion(cm_counts, class_names, run_name, save_path, row_norm=True, threshold=0.10, dpi=300):
    """
    cm_counts: ma tr·∫≠n ƒë·∫øm (ch∆∞a chu·∫©n ho√°) k√≠ch th∆∞·ªõc [C, C]
    row_norm : chu·∫©n ho√° theo h√†ng (recall) ƒë·ªÉ nh√¨n t·ªâ l·ªá ƒë√∫ng/sai t·ª´ng l·ªõp
    threshold: ch·ªâ annotate c√°c √¥ >= threshold (ngo√†i ƒë∆∞·ªùng ch√©o) ƒë·ªÉ ƒë·ª° r·ªëi
    """
    C = cm_counts.shape[0]
    cm = cm_counts.astype(float)
    if row_norm:
        cm = cm / cm.sum(axis=1, keepdims=True)
        cm = np.nan_to_num(cm)

    fig, ax = plt.subplots(figsize=(12, 10), dpi=200)
    sns.heatmap(cm, vmin=0, vmax=1, cmap="Blues", square=True,
                xticklabels=class_names, yticklabels=class_names,
                cbar_kws={'shrink': .6}, ax=ax)

    # annotate
    for i in range(C):
        for j in range(C):
            show = (i == j) or (cm[i, j] >= threshold)
            if show and cm_counts[i, j] > 0:
                ax.text(j + 0.5, i + 0.5,
                        f"{cm[i,j]*100:.0f}%\n({cm_counts[i,j]})",
                        ha="center", va="center", fontsize=7, color="black")

    ax.set_xlabel("Predicted", fontsize=12)
    ax.set_ylabel("True", fontsize=12)
    ax.set_title(f"Confusion Matrix (row-norm) ‚Äì {run_name}", fontsize=14, pad=10)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right", fontsize=8)
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
    plt.tight_layout()
    plt.savefig(save_path, dpi=dpi, bbox_inches="tight")
    plt.show()

def plot_top_confusions(cm_counts, class_names, run_name, save_path, topk=15, dpi=300):
    """
    V·∫Ω Top-K c·∫∑p d·ªÖ nh·∫ßm nh·∫•t (theo % h√†ng). B·ªè ƒë∆∞·ªùng ch√©o.
    """
    cm = cm_counts.astype(float)
    cm = cm / cm.sum(axis=1, keepdims=True)
    cm = np.nan_to_num(cm)

    pairs = []
    C = cm.shape[0]
    for i in range(C):
        for j in range(C):
            if i == j: 
                continue
            if cm_counts[i, j] == 0:
                continue
            pairs.append((i, j, cm[i, j], cm_counts[i, j]))

    # sort theo t·ª∑ l·ªá gi·∫£m d·∫ßn
    pairs.sort(key=lambda x: x[2], reverse=True)
    pairs = pairs[:topk]

    if not pairs:
        print("Kh√¥ng c√≥ c·∫∑p nh·∫ßm ƒë√°ng k·ªÉ.")
        return

    labels = [f"{class_names[i]} ‚Üí {class_names[j]}" for (i, j, _, _) in pairs]
    perc   = [100 * p for (_, _, p, _) in pairs]
    counts = [c for (_, _, _, c) in pairs]

    fig, ax = plt.subplots(figsize=(12, 7), dpi=200)
    ax.barh(labels, perc)
    ax.invert_yaxis()
    for k, (p, c) in enumerate(zip(perc, counts)):
        ax.text(p + 0.3, k, f"{p:.1f}% ({c})", va="center", fontsize=10)
    ax.set_xlabel("T·ªâ l·ªá nh·∫ßm (%)")
    ax.set_title(f"Top-{len(pairs)} c·∫∑p d·ªÖ nh·∫ßm nh·∫•t ‚Äì {run_name}", fontsize=14, pad=8)
    plt.tight_layout()
    plt.savefig(save_path, dpi=dpi, bbox_inches="tight")
    plt.show()


In [None]:
# ==== CELL 5: EVALUATE ONE RUN ====
import os, json, torch
import numpy as np

# B·∫°n c√≥ th·ªÉ ƒë·ªïi ƒë·ªÉ kh·ªõp folder th·∫≠t c·ªßa b·∫°n
RUNS_DIR = "Runs"   # m·ªói th∆∞ m·ª•c con l√† 1 l·∫ßn train, c√≥ file .mtl/.pt/.pth




from model.mtl_cnn import mtl_cnn_v1
from model.mobilenet_v4 import CustomMobileNetV4  # file b·∫°n ƒë√£ g·ª≠i
from model.efficientnet_b0 import CustomEfficientNetB0

def model_auto(run_name, num_classes=NUM_CLASSES):
    """
    Suy ƒëo√°n ki·∫øn tr√∫c t·ª´ t√™n run. B·∫°n ƒëang d√πng ti·ªÅn t·ªë:
    - 'mtl-cnn-...'
    - 'mtl-mobilenetv4-...'
    - 'mtl-efficientnetb0-...'
    """
    name = run_name.lower()
    if "mobilenetv4" in name:
        return CustomMobileNetV4(num_classes=num_classes)
    if "efficientnetb0" in name or "efficientnet_b0" in name:
        return CustomEfficientNetB0(num_classes=num_classes)
    # m·∫∑c ƒë·ªãnh: CNN t·ª± x√¢y
    return mtl_cnn_v1(num_classes=num_classes)



# --- REPLACE this in Cell 5 ---
from glob import glob
import os, time

EXTS = {".mtl", ".pt", ".pth"}

def find_checkpoint(run_path: str):
    """
    T√¨m checkpoint trong run_path v√† c√°c th∆∞ m·ª•c con ph·ªï bi·∫øn.
    ∆Øu ti√™n file c√≥ 'best' trong t√™n; n·∫øu kh√¥ng c√≥, l·∫•y file m·ªõi nh·∫•t.
    """
    # c√°c pattern hay g·∫∑p; ** ƒë·ªÉ ƒë·∫£m b·∫£o qu√©t s√¢u
    patterns = [
        "*", "checkpoints/*", "checkpoint/*", "ckpt*/*",
        "models/*", "weights/*", "**/*"
    ]
    cands = []
    for pat in patterns:
        for p in glob(os.path.join(run_path, pat), recursive=True):
            if os.path.isfile(p) and os.path.splitext(p)[1].lower() in EXTS:
                cands.append(p)

    if not cands:
        return None

    # ch·∫•m ƒëi·ªÉm: c√≥ 'best' ƒë∆∞·ª£c +100, 'ema' +10, 'last' +5; sau ƒë√≥ ∆∞u ti√™n mtime m·ªõi h∆°n
    def score(p):
        name = os.path.basename(p).lower()
        s = 0
        if "best" in name: s += 100
        if "ema"  in name: s += 10
        if "last" in name: s += 5
        return (s, os.path.getmtime(p))

    cands.sort(key=score, reverse=True)
    chosen = cands[0]
    print(f"  ‚úì checkpoint: {os.path.relpath(chosen, run_path)}")
    return chosen

def load_checkpoint(run_path_or_name, ckpt_path, device):
    """
    T·ª± ƒë·ªông t·∫°o model ƒë√∫ng lo·∫°i d·ª±a theo t√™n run ho·∫∑c object ƒë√£ truy·ªÅn v√†o.
    Tr·∫£ v·ªÅ model ƒë√£ load tr·ªçng s·ªë, .to(device).eval()
    """
    # 1Ô∏è‚É£ N·∫øu truy·ªÅn v√†o l√† object model (vd: CustomMobileNet, CNN,...)
    if not isinstance(run_path_or_name, (str, bytes, os.PathLike)):
        model = run_path_or_name
        run_name = model.__class__.__name__
    else:
        # 2Ô∏è‚É£ N·∫øu truy·ªÅn v√†o l√† ƒë∆∞·ªùng d·∫´n / t√™n run
        run_name = os.path.basename(str(run_path_or_name).rstrip(os.sep))
        model = build_model_auto(run_name)

    # 3Ô∏è‚É£ ƒê∆∞a model l√™n device
    model = model.to(device).eval()

    # 4Ô∏è‚É£ Load checkpoint ƒë√∫ng device
    state = torch.load(ckpt_path, map_location=device)

    # 5Ô∏è‚É£ X·ª≠ l√Ω nhi·ªÅu ƒë·ªãnh d·∫°ng state_dict kh√°c nhau
    if isinstance(state, dict):
        if "state_dict" in state:
            sd = state["state_dict"]
        elif "net" in state:
            sd = state["net"]
        elif "classification_best" in state:
            sd = state["classification_best"]
        elif "model" in state and isinstance(state["model"], dict):
            sd = state["model"]
        else:
            sd = state
    else:
        sd = state

    # 6Ô∏è‚É£ Load v√†o model
    missing, unexpected = model.load_state_dict(sd, strict=False)
    if missing or unexpected:
        print(f"‚ö†Ô∏è  load_state_dict warning ‚Üí missing: {missing}, unexpected: {unexpected}")

    model = model.float().to(device).eval()
    return model


def evaluate_model_at(run_path, run_name):
    # 1) loader cho TEST (d√πng build_test_loader b·∫°n ƒë√£ vi·∫øt, nh·ªõ truy·ªÅn batch_size)
    test_loader = build_test_loader(root_test, test_transform, batch_size=BATCH_SIZE)

    # 2) Remap label n·∫øu c·∫ßn (dataset.class_to_idx -> class_names)
    ds_idx2name = {v: k for k, v in test_loader.dataset.class_to_idx.items()}
    name2idx = {n: i for i, n in enumerate(class_names)}
    remap = {src_idx: name2idx[ds_idx2name[src_idx]] for src_idx in range(len(ds_idx2name))}

    # 3) l·∫•y checkpoint t·ªët nh·∫•t
    ckpt_path = pick_checkpoint(run_path)  # b·∫°n ƒë√£ c√≥

    # 4) load model v·ªÅ ƒë√∫ng device + eval
    model = load_checkpoint(run_path, ckpt_path, device)

    # 5) thu logits/predicts
    y_true, y_pred, y_prob = collect_logits(model, test_loader, device, remap=remap)

    # 6) t√≠nh c√°c ch·ªâ s·ªë v√† v·∫Ω h√¨nh (b·∫°n ƒë√£ c√≥ code s·∫µn)
    acc = accuracy_score(y_true, y_pred)
    prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="macro", zero_division=0)
    # ‚Ä¶ (v·∫Ω confusion matrix, top-confusions, l∆∞u ·∫£nh ‚Ä¶)
    return {"run": run_name, "acc": acc, "precision": prec, "recall": rec, "f1": f1}


In [None]:

# ==== CELL 6: EVALUATE ALL RUNS & RANKING (robust loader tuple) ====
import os, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import transforms
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

RUNS_DIR   = "Runs"
IMAGES_DIR = "images"
os.makedirs(IMAGES_DIR, exist_ok=True)

# D√πng l·∫°i class_names ƒë√£ n·∫°p ·ªü cell tr∆∞·ªõc; n·∫øu ch∆∞a c√≥ th√¨ th·ª≠ ƒë·ªçc file d·ª± ph√≤ng
if "class_names" not in globals():
    # n·∫øu b·∫°n ƒë√£ c√≥ file class_names.json d∆∞·ªõi ROOT th√¨ m·ªü ra
    fallback_json = os.path.join(ROOT, "class_names.json")
    if os.path.isfile(fallback_json):
        class_names = json.load(open(fallback_json, "r", encoding="utf-8"))
    else:
        raise RuntimeError("B·∫°n ch∆∞a n·∫°p class_names. H√£y ch·∫°y cell n·∫°p class_names ho·∫∑c cung c·∫•p file class_names.json.")

NUM_CLASSES = len(class_names)

# Cache ƒë·ªÉ Cell ROC nhi·ªÅu model
if "ROC_CACHE" not in globals():
    ROC_CACHE = []

def load_run_config(run_path, default_img=224, default_bs=64):
    cfg_path = os.path.join(run_path, "config.json")
    img, bs = default_img, default_bs
    if os.path.isfile(cfg_path):
        try:
            cfg = json.load(open(cfg_path, "r", encoding="utf-8"))
            img = int(cfg.get("img_size", cfg.get("IMG_SIZE", img)))
            bs  = int(cfg.get("batch_size", cfg.get("BATCH_SIZE", bs)))
        except Exception:
            pass
    return img, bs

def create_test_transform(img_size):
    return transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
    ])

def pick_checkpoint(run_path):
    ckpt_dir = os.path.join(run_path, "checkpoints")
    cands = []
    for d in [ckpt_dir, run_path]:
        if not os.path.isdir(d): 
            continue
        for fn in os.listdir(d):
            if fn.lower().endswith((".mtl", ".pt", ".pth")):
                cands.append(os.path.join(d, fn))
    if not cands: 
        return None
    mtls = [p for p in cands if p.lower().endswith(".mtl")]
    return sorted(mtls or cands, key=os.path.getmtime)[-1]

def normalize_loader_tuple(loader_out):
    """
    Tr·∫£ v·ªÅ: (test_loader, ds_class_to_idx)
    - Ch·∫•p nh·∫≠n build_test_loader tr·∫£ v·ªÅ DataLoader ho·∫∑c tuple (loader, class_to_idx) / (loader, idx2name)...
    """
    ds_class_to_idx = None
    if isinstance(loader_out, tuple):
        # ph·∫ßn t·ª≠ ƒë·∫ßu ch·∫Øc ch·∫Øn l√† loader
        test_loader = loader_out[0]
        # c·ªë t√¨m dict √°nh x·∫° l·ªõp trong c√°c ph·∫ßn t·ª≠ c√≤n l·∫°i
        for item in loader_out[1:]:
            if isinstance(item, dict):
                # n·∫øu l√† class_to_idx: key l√† t√™n l·ªõp (str), value l√† int
                if all(isinstance(k, str) for k in item.keys()) and all(isinstance(v, int) for v in item.values()):
                    ds_class_to_idx = item
                    break
                # n·∫øu l√† idx2name: key int, value str -> ƒë·∫£o l·∫°i
                if all(isinstance(k, int) for k in item.keys()) and all(isinstance(v, str) for v in item.values()):
                    ds_class_to_idx = {v: k for k, v in item.items()}
                    break
    else:
        test_loader = loader_out

    # fallback l·∫•y t·ª´ dataset
    if ds_class_to_idx is None:
        ds_class_to_idx = getattr(getattr(test_loader, "dataset", object()), "class_to_idx", {})
        if not isinstance(ds_class_to_idx, dict):
            ds_class_to_idx = {}

    return test_loader, ds_class_to_idx

def evaluate_model_at(run_path, run_name):
    # 1) ƒë·ªçc config
    img_size, bs = load_run_config(run_path, default_img=224, default_bs=64)
    test_transform = create_test_transform(img_size)

    # 2) build loader (c√≥ th·ªÉ tr·∫£ v·ªÅ tuple)
    loader_out = build_test_loader(root_test, test_transform, batch_size=bs)
    test_loader, ds_class_to_idx = normalize_loader_tuple(loader_out)

    # 3) remap nh√£n n·∫øu th·ª© t·ª± dataset kh√°c class_names chu·∫©n
    name2idx = {n: i for i, n in enumerate(class_names)}
    remap = make_remap(ds_class_to_idx, name2idx) if 'make_remap' in globals() else None

    # 4) load checkpoint + model
    ckpt_path = pick_checkpoint(run_path)
    if ckpt_path is None:
        print(f"  ‚ö† {run_name}: kh√¥ng t√¨m th·∫•y checkpoint (.mtl/.pt/.pth) ‚áí b·ªè qua.")
        return None
    model = model_auto(run_name)

    model = load_checkpoint(run_name, ckpt_path, device)

    # 5) d·ª± ƒëo√°n
    y_true, y_pred, y_prob = collect_logits(model, test_loader, device, remap=remap)

    # 6) metrics
    acc = float(accuracy_score(y_true, y_pred))
    prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted", zero_division=0)

    # 7) cache cho ROC ƒëa m√¥ h√¨nh
    ROC_CACHE.append({
        "name": run_name,
        "y_true": np.asarray(y_true),
        "y_prob": np.asarray(y_prob),
        "class_names": class_names,
    })

    return {
        "run": run_name, "acc": acc, "precision": prec, "recall": rec, "f1": f1,
        "img_size": img_size, "batch_size": bs, "ckpt": os.path.basename(ckpt_path)
    }

# ==== qu√©t & ƒë√°nh gi√° t·∫•t c·∫£ runs ====
results = []
if not os.path.isdir(RUNS_DIR):
    print(f"‚ö† Th∆∞ m·ª•c {RUNS_DIR} kh√¥ng t·ªìn t·∫°i.")
else:
    for run_name in sorted(os.listdir(RUNS_DIR)):
        run_path = os.path.join(RUNS_DIR, run_name)
        if not os.path.isdir(run_path):
            continue
        print(f"‚ñ∂Ô∏è  ƒê√°nh gi√°: {run_name}")
        res = evaluate_model_at(run_path, run_name)
        if res:
            results.append(res)

# ==== b·∫£ng x·∫øp h·∫°ng & l∆∞u th√†nh ·∫£nh ====
if results:
    df = pd.DataFrame(results).sort_values("acc", ascending=False)
    display(df)

    fig, ax = plt.subplots(figsize=(10, 0.45*len(df)+1), dpi=220)
    ax.axis("off")
    tbl = ax.table(cellText=df.values, colLabels=df.columns, loc="center", cellLoc="center")
    tbl.auto_set_font_size(False); tbl.set_fontsize(9); tbl.scale(1, 1.25)
    plt.tight_layout()
    out_png = os.path.join(IMAGES_DIR, "summary_models.png")
    plt.savefig(out_png, dpi=300, bbox_inches="tight")
    plt.show()
    print(f"‚úÖ L∆∞u: {out_png}")
else:
    print("‚ö† Kh√¥ng c√≥ run h·ª£p l·ªá trong th∆∞ m·ª•c Runs/")


‚ñ∂Ô∏è  ƒê√°nh gi√°: mtl-cnn-20251029-201543


  state = torch.load(ckpt_path, map_location=device)


In [None]:
# ==== CELL 7 (REPLACE): V·∫Ω Loss/Accuracy CHO T·∫§T C·∫¢ C√ÅC RUNS ====
import os, re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

RUNS_DIR = "Runs"          # gi·ªØ ƒë√∫ng ƒë∆∞·ªùng d·∫´n b·∫°n ƒëang d√πng
IMAGES_DIR = "images"
os.makedirs(IMAGES_DIR, exist_ok=True)

def pick_col(df, pats):
    """Ch·ªçn c·ªôt ƒë·∫ßu ti√™n kh·ªõp pattern (kh√¥ng ph√¢n bi·ªát hoa/th∆∞·ªùng)."""
    pats = [p.lower() for p in pats]
    for c in df.columns:
        cl = c.lower()
        if any(re.search(p, cl) for p in pats):
            return c
    return None

def plot_history_for_run(run_path, run_name):
    # 1) t√¨m file log h·ª£p l·ªá
    for cand in ["history.csv", "history.json", "train_log.csv", "metrics.csv"]:
        hp = os.path.join(run_path, cand)
        if os.path.isfile(hp):
            hist_path = hp
            break
    else:
        print(f"[-] {run_name}: kh√¥ng th·∫•y history.(csv|json)")
        return

    # 2) ƒë·ªçc log v√† chu·∫©n ho√° c·ªôt
    if hist_path.endswith(".json"):
        df = pd.read_json(hist_path)
    else:
        df = pd.read_csv(hist_path)
    df = df.copy()
    df.columns = [c.strip() for c in df.columns]

    # 3) epoch th·ª±c t·∫ø trong file; n·∫øu kh√¥ng c√≥ c·ªôt epoch th√¨ m·∫∑c ƒë·ªãnh 1..len
    c_epoch = pick_col(df, [r"^epoch$", r"^epochs?$"])
    epoch = df[c_epoch].to_numpy() if c_epoch else np.arange(1, len(df)+1)

    # 4) b·∫Øt c√°c c·ªôt loss/acc ‚Äúm·ªÅm‚Äù
    c_tr_loss = pick_col(df, [r"^loss$", r"train.*loss"])
    c_va_loss = pick_col(df, [r"val.*loss", r"valid.*loss"])
    c_tr_acc  = pick_col(df, [r"^acc$", r"accuracy$", r"train.*acc", r"train.*accuracy"])
    c_va_acc  = pick_col(df, [r"val.*acc", r"val.*accuracy", r"valid.*acc", r"valid.*accuracy"])

    tr_loss = df[c_tr_loss].to_numpy() if c_tr_loss else None
    va_loss = df[c_va_loss].to_numpy() if c_va_loss else None
    tr_acc  = df[c_tr_acc].to_numpy()  if c_tr_acc  else None
    va_acc  = df[c_va_acc].to_numpy()  if c_va_acc  else None

    # 5) epoch t·ªët nh·∫•t ƒë·ªÉ annotate
    best_ep, note = None, ""
    if va_acc is not None and len(va_acc) > 0:
        best_ep = int(epoch[np.nanargmax(va_acc)])
        note = f"best val_acc@{best_ep}={np.nanmax(va_acc):.3f}"
    elif va_loss is not None and len(va_loss) > 0:
        best_ep = int(epoch[np.nanargmin(va_loss)])
        note = f"best val_loss@{best_ep}={np.nanmin(va_loss):.3f}"

    # 6) v·∫Ω 2 subplot
    fig, ax = plt.subplots(1, 2, figsize=(14, 5), dpi=160)

    # Loss
    if tr_loss is not None: ax[0].plot(epoch, tr_loss, label="Train loss")
    if va_loss is not None: ax[0].plot(epoch, va_loss, label="Val loss")
    if best_ep is not None: ax[0].axvline(best_ep, ls="--", lw=1, c="gray")
    ax[0].set_title(f"{run_name} ‚Äì Loss")
    ax[0].set_xlabel("Epoch"); ax[0].set_ylabel("Loss"); ax[0].legend()

    # Accuracy
    if tr_acc is not None: ax[1].plot(epoch, tr_acc, label="Train acc")
    if va_acc is not None: ax[1].plot(epoch, va_acc, label="Val acc")
    if best_ep is not None:
        ax[1].axvline(best_ep, ls="--", lw=1, c="gray", label=note if va_acc is not None else None)
    ax[1].set_title(f"{run_name} ‚Äì Accuracy")
    ax[1].set_xlabel("Epoch"); ax[1].set_ylabel("Accuracy"); ax[1].legend()

    plt.tight_layout()
    out_png = os.path.join(IMAGES_DIR, f"{run_name}_history.png")
    plt.savefig(out_png, dpi=300, bbox_inches="tight")
    plt.show()
    print(f"‚úì ƒê√£ l∆∞u: {out_png}")

# üîÅ DUY·ªÜT H·∫æT T·∫§T C·∫¢ RUNS v√† v·∫Ω
if not os.path.isdir(RUNS_DIR):
    print(f"Th∆∞ m·ª•c '{RUNS_DIR}' kh√¥ng t·ªìn t·∫°i.")
else:
    for run_name in sorted(os.listdir(RUNS_DIR)):
        run_path = os.path.join(RUNS_DIR, run_name)
        if not os.path.isdir(run_path):
            continue
        print(f"‚Üí V·∫Ω l·ªãch s·ª≠: {run_name}")
        plot_history_for_run(run_path, run_name)


In [None]:
# ==== CELL 8A: PH√ÇN T√çCH M√î H√åNH CHUY√äN S√ÇU ====
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.preprocessing import label_binarize
import seaborn as sns
import torch

IMAGES_DIR = "images"
os.makedirs(IMAGES_DIR, exist_ok=True)

# ƒê·∫£m b·∫£o c√≥ class_names v√† test_loader (ƒë√£ build ·ªü Cell 3)
assert "class_names" in globals(), "‚ö†Ô∏è C·∫ßn n·∫°p class_names tr∆∞·ªõc Cell 8!"
assert "test_loader" in globals(), "‚ö†Ô∏è C·∫ßn build test_loader tr∆∞·ªõc Cell 8!"


In [None]:
# CELL 8B: Bi·ªÉu ƒë·ªì ROC Curve & AUC (multi-class, one-vs-rest)
def plot_roc_auc_for_model(run_name, y_true, y_prob, class_names):
    """
    V·∫Ω ROC & t√≠nh AUC trung b√¨nh cho m√¥ h√¨nh ƒëa l·ªõp.
    y_true:  nh√£n th·∫≠t (int)
    y_prob:  x√°c su·∫•t softmax [N, num_classes]
    """
    n_classes = len(class_names)
    y_true_bin = label_binarize(y_true, classes=range(n_classes))

    # ROC & AUC t·ª´ng l·ªõp
    fpr, tpr, roc_auc = {}, {}, {}
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_prob[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # AUC trung b√¨nh
    macro_auc = roc_auc_score(y_true_bin, y_prob, average="macro")
    print(f"‚Üí {run_name}: macro AUC = {macro_auc:.4f}")

    # V·∫Ω 5 l·ªõp ƒë·∫ßu ƒë·ªÉ tr√°nh r·ªëi
    plt.figure(figsize=(7, 6), dpi=140)
    for i, cname in enumerate(class_names[:5]):
        plt.plot(fpr[i], tpr[i], label=f"{cname} (AUC = {roc_auc[i]:.3f})")
    plt.plot([0, 1], [0, 1], "k--", label="Random")
    plt.title(f"ROC Curves ‚Äì {run_name}\nMacro AUC = {macro_auc:.3f}")
    plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
    plt.legend(fontsize=8); plt.tight_layout()
    out_png = os.path.join(IMAGES_DIR, f"{run_name}_roc_auc.png")
    plt.savefig(out_png, dpi=300, bbox_inches="tight")
    plt.show()
    print("‚úì ƒê√£ l∆∞u:", out_png)


In [None]:
# (C) T·ªëc ƒë·ªô & hi·ªáu nƒÉng Inferencing
import time

def benchmark_model(model, device, input_size=(1, 3, 224, 224), repeat=30):
    """
    ƒêo th·ªùi gian infer (Forward pass) v√† ∆∞·ªõc t√≠nh FPS tr√™n batch 1.
    """
    x = torch.randn(input_size).to(device)
    model.eval()

    with torch.no_grad():
        # warm-up
        for _ in range(5):
            _ = model(x)
        if device.type == "cuda":
            torch.cuda.synchronize()

        t0 = time.time()
        for _ in range(repeat):
            _ = model(x)
        if device.type == "cuda":
            torch.cuda.synchronize()
        t1 = time.time()

    avg = (t1 - t0) / repeat
    fps = 1 / avg
    params = sum(p.numel() for p in model.parameters())
    print(f"‚è±Ô∏è {model.__class__.__name__}: {avg*1000:.2f} ms/infer ({fps:.1f} FPS) | Params ‚âà {params/1e6:.2f} M")
    return avg, fps, params


In [None]:
# ==== CELL 9: ·∫¢NH TEST B·ªä NH·∫¶M NHI·ªÄU NH·∫§T (Top Confused Pairs) ====
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from PIL import Image
import io

IMAGES_DIR = "images"
os.makedirs(IMAGES_DIR, exist_ok=True)

def show_top_confused_pairs(model, test_loader, class_names, run_name, device, top_k=6):
    """
    Hi·ªÉn th·ªã top K c·∫∑p l·ªõp m√† model nh·∫ßm l·∫´n nhi·ªÅu nh·∫•t (d·ª±a tr√™n confusion matrix)
    + K√®m ·∫£nh v√≠ d·ª• th·∫≠t b·ªã nh·∫ßm.
    """
    model.eval()
    y_true, y_pred, paths = [], [], []

    # üîπ B1: Thu nh√£n th·∫≠t, d·ª± ƒëo√°n, v√† ƒë∆∞·ªùng d·∫´n ·∫£nh
    with torch.no_grad():
        for x, y in test_loader:
            x = x.to(device)
            outputs = model(x)
            preds = outputs.argmax(1).cpu().numpy()
            y_true.extend(y.numpy())
            y_pred.extend(preds)

            # N·∫øu dataset c√≥ thu·ªôc t√≠nh filepaths / samples
            if hasattr(test_loader.dataset, "samples"):
                paths.extend([s[0] for s in test_loader.dataset.samples[len(paths):len(paths)+len(y)]])
            elif hasattr(test_loader.dataset, "imgs"):
                paths.extend([s[0] for s in test_loader.dataset.imgs[len(paths):len(paths)+len(y)]])
            else:
                paths.extend(["(Kh√¥ng c√≥ ƒë∆∞·ªùng d·∫´n)"] * len(y))

    y_true, y_pred = np.array(y_true), np.array(y_pred)
    cm = confusion_matrix(y_true, y_pred, normalize="true")
    cm_counts = confusion_matrix(y_true, y_pred)

    # üîπ B2: X√°c ƒë·ªãnh c√°c c·∫∑p b·ªã nh·∫ßm nhi·ªÅu nh·∫•t (kh√¥ng l·∫•y ƒë∆∞·ªùng ch√©o)
    pairs = []
    for i in range(len(class_names)):
        for j in range(len(class_names)):
            if i != j and cm[i, j] > 0:
                pairs.append((i, j, cm[i, j], cm_counts[i, j]))
    top_pairs = sorted(pairs, key=lambda x: x[2], reverse=True)[:top_k]

    # üîπ B3: Hi·ªÉn th·ªã t·ª´ng c·∫∑p k√®m ·∫£nh minh h·ªça
    fig, axes = plt.subplots(len(top_pairs), 2, figsize=(7, 3 * len(top_pairs)))
    fig.suptitle(f"·∫¢nh test th·ª±c s·ª± b·ªã model nh·∫ßm (Top confused pairs) ‚Äì {run_name}",
                 fontsize=14, fontweight="bold", y=0.995)

    for idx, (i, j, rate, count) in enumerate(top_pairs):
        # T√¨m ·∫£nh v√≠ d·ª• b·ªã nh·∫ßm n√†y
        candidates = np.where((y_true == i) & (y_pred == j))[0]
        img_path = paths[candidates[0]] if len(candidates) > 0 else None

        # C·ªôt 1: ·∫£nh minh h·ªça
        ax_img, ax_text = axes[idx]
        ax_img.axis("off")
        if img_path and os.path.exists(img_path):
            img = Image.open(img_path).convert("RGB")
            ax_img.imshow(img)
        else:
            ax_img.text(0.5, 0.5, "Kh√¥ng c√≥ ·∫£nh", ha="center", va="center", fontsize=10)

        # C·ªôt 2: th√¥ng tin nh·∫ßm l·∫´n
        ax_text.axis("off")
        text = (
            f"·∫¢nh b·ªã nh·∫ßm: {class_names[i]} ‚Üí {class_names[j]}\n\n"
            f"T·ªâ l·ªá nh·∫ßm (CM): {rate*100:.2f}% ({count} m·∫´u)\n"
            f"True: {class_names[i]}\nPred: {class_names[j]}"
        )
        ax_text.text(0, 0.5, text, va="center", fontsize=11)

    plt.tight_layout()
    out_path = os.path.join(IMAGES_DIR, f"{run_name}_top_confused_pairs.png")
    plt.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.show()
    print(f"‚úì ƒê√£ l∆∞u: {out_path}")


In [None]:
# Gi·∫£ s·ª≠ b·∫°n ƒë√£ c√≥ results [] t·ª´ Cell 6 ‚Äì ch·ª©a t√™n m·ªói run
for r in results:
    run_name = r["run"]
    run_path = os.path.join("Runs", run_name)
    ckpt = pick_checkpoint(run_path)
    model = load_checkpoint(run_name, ckpt, device)
    print(f"\n=== ƒê√ÅNH GI√Å M√î H√åNH {run_name} ===")

    # 1Ô∏è‚É£ Inference & thu logits
    y_true, y_pred, y_prob = collect_logits(model, test_loader, device)

    # 2Ô∏è‚É£ V·∫Ω ROC & AUC
    plot_roc_auc_for_model(run_name, y_true, y_prob, class_names)


    # Visualize ‚ÄúTop Confused Pairs‚Äù (·∫¢nh b·ªã nh·∫ßm nhi·ªÅu nh·∫•t)
    show_top_confused_pairs(model, test_loader, class_names, run_name, device, top_k=6)

    # 4Ô∏è‚É£ Benchmark t·ªëc ƒë·ªô
    benchmark_model(model, device, (1, 3, 224, 224))
