## Install & Imports

In [None]:
!pip install -q timm mediapipe pillow-heif

import os
import json
import warnings
from pathlib import Path

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as T

from pillow_heif import register_heif_opener
from PIL import Image
import cv2
import mediapipe as mp

import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings("ignore")
register_heif_opener()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


## Konfigurasi Path & Parameter

In [None]:
# === DATA TEST ===
TEST_DIR   = Path("./Test")      # folder gambar test
TEST_CSV   = Path("./test.csv")  # csv: filename,label

# === MODEL & MAPPING ===
MODEL_DIR  = Path("./models")
DENSE_WEIGHTS_PATH = MODEL_DIR / "densenet121_facecls.pth"
LABEL_MAPPING_PATH = MODEL_DIR / "label_mapping.json"

# === OUTPUT ===
WORK_DIR   = Path("./test_outputs")
WORK_DIR.mkdir(parents=True, exist_ok=True)

IMG_SIZE     = 224
BATCH_SIZE   = 32
NUM_WORKERS  = 0
RANDOM_SEED  = 42

np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if device.type == "cuda":
    torch.cuda.manual_seed_all(RANDOM_SEED)

VALID_EXTS = [".jpg", ".jpeg", ".png", ".webp", ".bmp", ".jfif", ".heic", ".heif"]

print("TEST_DIR :", TEST_DIR.resolve())
print("TEST_CSV :", TEST_CSV.resolve())
print("MODEL_DIR:", MODEL_DIR.resolve())


## Load Mapping, test.csv, Validasi Label, Build Test Paths

In [None]:
# --- Cek keberadaan file penting ---
assert LABEL_MAPPING_PATH.exists(), f"Tidak menemukan {LABEL_MAPPING_PATH}"
assert TEST_DIR.exists(), f"Tidak menemukan folder Test: {TEST_DIR}"
assert TEST_CSV.exists(), f"Tidak menemukan file test.csv: {TEST_CSV}"

# --- Load mapping class <-> index dari training ---
with open(LABEL_MAPPING_PATH, "r") as f:
    mapping = json.load(f)

class2idx = {k: int(v) for k, v in mapping["class2idx"].items()}
idx2class = {int(k): v for k, v in mapping["idx2class"].items()}
num_classes = len(class2idx)

print("Total kelas (mapping):", num_classes)
print("Contoh mapping:", list(class2idx.items())[:5])

# --- Load test.csv ---
df_test = pd.read_csv(TEST_CSV)
assert "filename" in df_test.columns and "label" in df_test.columns, \
    "test.csv harus memiliki kolom 'filename' dan 'label'."

df_test["filename"] = df_test["filename"].astype(str)
df_test["label"]    = df_test["label"].astype(str)

# --- VALIDASI: pastikan semua label test.csv ada di mapping training ---
labels_in_test    = set(df_test["label"].tolist())
labels_in_mapping = set(class2idx.keys())

unknown_labels = sorted(labels_in_test - labels_in_mapping)

print(f"[INFO] Total label unik di test.csv   : {len(labels_in_test)}")
print(f"[INFO] Total label unik di mapping    : {len(labels_in_mapping)}")

if unknown_labels:
    print("\n[WARNING] Terdapat label di test.csv yang TIDAK ditemukan di mapping training:")
    for lbl in unknown_labels:
        print("   -", repr(lbl))

    raise ValueError(
        "\nERROR: Ada label di test.csv yang tidak ada di label_mapping.json.\n"
        "Silakan cek kembali nama label test.csv agar sesuai dengan nama folder Train.\n"
        "Masalah ini biasanya terjadi karena perbedaan spasi, titik, underscore, kapitalisasi, atau typo."
    )

print("[INFO] Validasi label test.csv selesai. Semua label cocok dengan mapping. âœ”")

# --- Build list path & label index untuk dataset test ---
test_paths = []
test_labels = []

for _, row in df_test.iterrows():
    fname = row["filename"]
    label_name = row["label"]
    fpath = TEST_DIR / fname

    if not fpath.exists():
        print(f"[WARNING] File tidak ditemukan: {fpath}, skip.")
        continue

    ext = fpath.suffix.lower()
    if ext not in VALID_EXTS:
        print(f"[WARNING] Ekstensi tidak didukung: {fpath}, skip.")
        continue

    label_idx = class2idx[label_name]
    test_paths.append(str(fpath))
    test_labels.append(label_idx)

test_paths  = np.array(test_paths)
test_labels = np.array(test_labels, dtype=np.int64)

print("Total data test terbaca:", len(test_paths))
print("Contoh path:", test_paths[:3])
print("Contoh label idx:", test_labels[:3])


## Helper Baca Gambar & Face Crop

In [None]:
mp_face_detection = mp.solutions.face_detection

def read_image_bgr(path):
    path = str(path)
    ext = os.path.splitext(path)[1].lower()
    if ext in [".heic", ".heif"]:
        img = Image.open(path).convert("RGB")
        img = np.array(img)[:, :, ::-1]  # RGB -> BGR
        return img

    img = cv2.imread(path)
    if img is None:
        img_pil = Image.open(path).convert("RGB")
        img = np.array(img_pil)[:, :, ::-1]
    return img


def crop_face_mediapipe(path, margin=0.2):
    img_bgr = read_image_bgr(path)
    h, w, _ = img_bgr.shape
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

    with mp_face_detection.FaceDetection(
        model_selection=1,
        min_detection_confidence=0.5
    ) as face_det:
        results = face_det.process(img_rgb)

        if not results.detections:
            return Image.fromarray(img_rgb)

        detections = sorted(
            results.detections,
            key=lambda d: d.score[0] if d.score else 0,
            reverse=True
        )
        det = detections[0]
        bbox = det.location_data.relative_bounding_box

        x_min = int(bbox.xmin * w)
        y_min = int(bbox.ymin * h)
        box_w = int(bbox.width * w)
        box_h = int(bbox.height * h)

        cx = x_min + box_w // 2
        cy = y_min + box_h // 2

        half_w = int(box_w * (1 + margin) / 2)
        half_h = int(box_h * (1 + margin) / 2)

        x1 = max(0, cx - half_w)
        x2 = min(w, cx + half_w)
        y1 = max(0, cy - half_h)
        y2 = min(h, cy + half_h)

        crop = img_rgb[y1:y2, x1:x2]
        if crop.size == 0:
            return Image.fromarray(img_rgb)

        return Image.fromarray(crop)


## Transform & Dataset Test

In [None]:
test_transform = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE)),
    T.ToTensor(),
    T.Normalize(mean=[0.5, 0.5, 0.5],
                std=[0.5, 0.5, 0.5]),
])

class FaceTestDataset(Dataset):
    def __init__(self, paths, labels, transform=None, use_face_crop=True):
        self.paths = paths
        self.labels = labels
        self.transform = transform
        self.use_face_crop = use_face_crop

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        label = self.labels[idx]

        if self.use_face_crop:
            img_pil = crop_face_mediapipe(path)
        else:
            img_bgr = read_image_bgr(path)
            img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
            img_pil = Image.fromarray(img_rgb)

        if self.transform is not None:
            img_tensor = self.transform(img_pil)
        else:
            img_tensor = T.ToTensor()(img_pil)

        fname = os.path.basename(path)
        return img_tensor, label, fname


test_dataset = FaceTestDataset(
    test_paths,
    test_labels,
    transform=test_transform,
    use_face_crop=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

print("Jumlah batch test:", len(test_loader))


## Metrics (Tanpa sklearn)

In [None]:
def compute_metrics(y_true, y_pred, num_classes):
    cm = np.zeros((num_classes, num_classes), dtype=np.int64)
    for t, p in zip(y_true, y_pred):
        cm[t, p] += 1

    acc = np.trace(cm) / np.sum(cm) if np.sum(cm) > 0 else 0.0

    precision_per_class = []
    recall_per_class = []
    f1_per_class = []

    for c in range(num_classes):
        tp = cm[c, c]
        fp = cm[:, c].sum() - tp
        fn = cm[c, :].sum() - tp

        prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        rec  = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1   = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0.0

        precision_per_class.append(prec)
        recall_per_class.append(rec)
        f1_per_class.append(f1)

    precision_per_class = np.array(precision_per_class)
    recall_per_class    = np.array(recall_per_class)
    f1_per_class        = np.array(f1_per_class)

    support = cm.sum(axis=1)
    total = support.sum() if support.sum() > 0 else 1
    weights = support / total

    precision_weighted = np.sum(precision_per_class * weights)
    recall_weighted    = np.sum(recall_per_class * weights)
    f1_weighted        = np.sum(f1_per_class * weights)

    precision_macro = precision_per_class.mean()
    recall_macro    = recall_per_class.mean()
    f1_macro        = f1_per_class.mean()

    return {
        "confusion_matrix": cm,
        "accuracy": acc,
        "precision_weighted": precision_weighted,
        "recall_weighted": recall_weighted,
        "f1_weighted": f1_weighted,
        "precision_macro": precision_macro,
        "recall_macro": recall_macro,
        "f1_macro": f1_macro,
    }


def plot_confusion_matrix(cm, title="Confusion Matrix"):
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, cmap="Blues")
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()


## Build DenseNet121 (harus sama dengan training)

In [None]:
import timm

def build_densenet121(num_classes):
    model = timm.create_model(
        "densenet121",
        pretrained=False,        # di TEST: False, pakai weight .pth
        num_classes=num_classes
    )
    return model

densenet_model = build_densenet121(num_classes).to(device)
print("DenseNet121 params (M):", sum(p.numel() for p in densenet_model.parameters()) / 1e6)


## Load Saved Weights DenseNet121

In [None]:
assert DENSE_WEIGHTS_PATH.exists(), f"Tidak menemukan: {DENSE_WEIGHTS_PATH}"

dense_state = torch.load(DENSE_WEIGHTS_PATH, map_location=device)
densenet_model.load_state_dict(dense_state)
densenet_model.eval()

print("Loaded DenseNet121 from:", DENSE_WEIGHTS_PATH)


## Fungsi Evaluasi Loader

In [None]:
@torch.no_grad()
def evaluate_on_loader(model, loader, device):
    model.eval()
    all_true = []
    all_pred = []
    all_fname = []

    for imgs, labels, fnames in loader:
        imgs = imgs.to(device)
        labels = labels.to(device)

        logits = model(imgs)
        preds = torch.argmax(logits, dim=1)

        all_true.extend(labels.cpu().numpy().tolist())
        all_pred.extend(preds.cpu().numpy().tolist())
        all_fname.extend(list(fnames))

    all_true = np.array(all_true)
    all_pred = np.array(all_pred)
    return all_true, all_pred, all_fname


## Evaluasi DenseNet121 + jawaban.csv

In [None]:
print("=== Evaluasi DenseNet121 di TEST SET ===")

y_true_dense, y_pred_dense, fnames_dense = evaluate_on_loader(
    densenet_model, test_loader, device
)

metrics_dense = compute_metrics(y_true_dense, y_pred_dense, num_classes)

print(f"Accuracy          : {metrics_dense['accuracy']:.4f}")
print(f"Precision (wgt)   : {metrics_dense['precision_weighted']:.4f}")
print(f"Recall    (wgt)   : {metrics_dense['recall_weighted']:.4f}")
print(f"F1-score (wgt)    : {metrics_dense['f1_weighted']:.4f}")
print(f"Precision (macro) : {metrics_dense['precision_macro']:.4f}")
print(f"Recall    (macro) : {metrics_dense['recall_macro']:.4f}")
print(f"F1-score (macro)  : {metrics_dense['f1_macro']:.4f}")

plot_confusion_matrix(metrics_dense["confusion_matrix"],
                      title="Confusion Matrix - DenseNet121 (Test Set)")

# Simpan full pred + jawaban submission
df_dense = pd.DataFrame({
    "filename": fnames_dense,
    "true_label_idx": y_true_dense,
    "true_label_name": [idx2class[int(i)] for i in y_true_dense],
    "pred_label_idx": y_pred_dense,
    "pred_label_name": [idx2class[int(i)] for i in y_pred_dense],
})

jawaban_dense_full = WORK_DIR / "jawaban_densenet121_full.csv"
jawaban_dense_sub  = WORK_DIR / "jawaban_densenet121_submit.csv"

df_dense.to_csv(jawaban_dense_full, index=False)
df_dense[["filename", "pred_label_name"]].rename(
    columns={"pred_label_name": "label"}
).to_csv(jawaban_dense_sub, index=False)

print("Saved DenseNet full predictions  :", jawaban_dense_full)
print("Saved DenseNet jawaban.csv (sub) :", jawaban_dense_sub)
