# ArcFace Evaluation - Kaggle (Fixed)

**FIX**: Dung embedding-based classification thay vi logits-based

## Van de truoc do:
- ArcFace model can labels de tinh logits (ArcMarginProduct)
- Label mapping khac nhau giua train va eval datasets
- Ket qua: accuracy gan 0%

## Fix:
- Extract embeddings cho train set (prototypes)
- Extract embeddings cho eval set
- Classification bang cosine similarity

In [1]:
import os, sys, time, json
import shutil, glob
from datetime import datetime
# numpy và matplotlib sẽ được import sau khi fix compatibility
from tqdm import tqdm
from collections import defaultdict

ROOT = "/kaggle/working/FaceRecognition"
CHECKPOINT_DIR = "/kaggle/working/checkpoints/arcface"
KAGGLE_DATASET_NAME = "celeba-aligned-balanced"
DATA_DIR = f"/kaggle/input/{KAGGLE_DATASET_NAME}"
CHECKPOINT_DATASET_NAME = "arcface-checkpoints"

os.makedirs(CHECKPOINT_DIR, exist_ok=True)

In [2]:
# Copy checkpoint
checkpoint_input = f"/kaggle/input/{CHECKPOINT_DATASET_NAME}"
if os.path.exists(checkpoint_input):
    for pth in glob.glob(os.path.join(checkpoint_input, "**/*.pth"), recursive=True):
        dest = os.path.join(CHECKPOINT_DIR, os.path.basename(pth))
        if not os.path.exists(dest): shutil.copy(pth, dest)
    print(f"Checkpoints: {os.listdir(CHECKPOINT_DIR)}")

Checkpoints: ['arcface_best.pth']


In [None]:
# Cài đặt dependencies với phiên bản tương thích
# CHẠY CELL NÀY TRƯỚC KHI IMPORT NUMPY/SCIPY/SKLEARN
# Nếu gặp lỗi AttributeError với numpy, restart kernel và chạy lại cell này
!pip uninstall -y numpy scipy scikit-learn 2>/dev/null || true
!pip cache purge 2>/dev/null || true
!pip install -q --no-cache-dir "numpy>=1.24,<2.0"
!pip install -q --no-cache-dir "scipy>=1.10,<2.0"
!pip install -q --no-cache-dir "scikit-learn>=1.3,<2.0"
!pip install -q "matplotlib>=3.7" "seaborn>=0.12"
print("[OK] Dependencies installed successfully")

In [None]:
# Cau hinh GitHub token
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    GITHUB_TOKEN = user_secrets.get_secret("GITHUB_TOKEN")
    print("[OK] Da lay GITHUB_TOKEN")
except Exception as e:
    GITHUB_TOKEN = None
    print("[INFO] Su dung public URL")

if GITHUB_TOKEN:
    REPO_URL = f"https://{GITHUB_TOKEN}@github.com/sin0235/FaceRecognition.git"
else:
    REPO_URL = "https://github.com/sin0235/FaceRecognition.git"

# Clone repository
if os.path.exists(ROOT):
    print("Repository da ton tai, dang pull updates...")
    %cd {ROOT}
    if GITHUB_TOKEN:
        !git remote set-url origin {REPO_URL}
    !git pull --no-rebase origin fix/lbph-module
else:
    print(f"Dang clone repository...")
    !git clone {REPO_URL} {ROOT}
    %cd {ROOT}

print(f"\nWorking directory: {os.getcwd()}")
!ls -la

# Thêm ROOT vào sys.path để import modules
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)
    print(f"\n[OK] Added {ROOT} to sys.path")

[OK] Da lay GITHUB_TOKEN
Repository da ton tai, dang pull updates...
/kaggle/working/FaceRecognition
From https://github.com/sin0235/FaceRecognition
 * branch            fix/lbph-module -> FETCH_HEAD
Committer identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@18c6c2083f71.(none)')

Working directory: /kaggle/working/FaceRecognition
total 140
drwxr-xr-x 15 root root  4096 Dec 19 09:03 .
drwxr-xr-x  5 root root  4096 Dec 19 09:03 ..
drwxr-xr-x  2 root root  4096 Dec 19 09:03 app
drwxr-xr-x  2 root root  4096 Dec 19 09:03 configs
drwxr-xr-x  8 root root  4096 Dec 19 09:08 .git
drwxr-xr-x  3 root root  4096 Dec 19 09:03 .github
-rw-r--r--  1 root root  1246 Dec 19 09:03 .gitignore
drwxr-xr-x  2 root root  4096 Dec 19 09:03 inference
-rw-r

In [5]:
# Suppress TensorBoard warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings('ignore')

# Import các thư viện cần thiết
import sys

# Import numpy, matplotlib, seaborn, sklearn
# LƯU Ý: Nếu gặp lỗi AttributeError với numpy (numpy.ufunc object has no attribute '__module__'),
# cần RESTART KERNEL và chạy lại cell cài đặt dependencies trước
try:
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.metrics import confusion_matrix, roc_curve, auc
    from sklearn.preprocessing import label_binarize
    from sklearn.manifold import TSNE
    print("[OK] All imports successful")
    print(f"NumPy version: {np.__version__}")
except (ImportError, AttributeError, TypeError, ValueError) as e:
    print(f"[ERROR] Import failed: {type(e).__name__}: {e}")
    print("\n[GIẢI PHÁP]")
    print("1. Restart kernel: Kernel -> Restart Kernel")
    print("2. Chạy lại các cell từ đầu, đặc biệt là cell cài đặt dependencies")
    print("3. Nếu vẫn lỗi, chạy lệnh sau trong cell mới:")
    print('   !pip uninstall -y numpy scipy scikit-learn && pip install "numpy>=1.24,<2.0" "scipy>=1.10,<2.0" "scikit-learn>=1.3,<2.0"')
    print("   Sau đó restart kernel và chạy lại")
    raise

import torch
from PIL import Image

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")

[OK] All imports successful
Device: cuda


---
## 1. Load Model

In [6]:
from models.arcface.arcface_model import ArcFaceModel

checkpoint_path = os.path.join(CHECKPOINT_DIR, "arcface_best.pth")
checkpoint = torch.load(checkpoint_path, map_location='cpu', weights_only=False)
num_classes = checkpoint.get('num_classes', 9343)

model = ArcFaceModel(num_classes=num_classes, pretrained=False)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device).eval()

print(f"Model: {num_classes} classes")
print(f"Training epochs: {checkpoint.get('epoch', 0) + 1}")
print(f"Best val acc (training): {checkpoint.get('best_val_acc', 0):.2f}%")

E0000 00:00:1766135325.266615     193 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766135325.314681     193 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766135325.703651     193 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766135325.703690     193 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766135325.703692     193 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766135325.703695     193 computation_placer.cc:177] computation placer already registered. Please check linka

[INFO] TensorBoard available
Project root: /kaggle/working/FaceRecognition
Model: 9343 classes
Training epochs: 105
Best val acc (training): 81.53%


---
## 2. Build Reference Database (Train Set Prototypes)

In [7]:
from models.arcface.arcface_dataloader import get_val_transforms
from torch.utils.data import DataLoader, Dataset

# Tim data dirs
train_dir = os.path.join(DATA_DIR, "CelebA_Aligned_Balanced", "train")
val_dir = os.path.join(DATA_DIR, "CelebA_Aligned_Balanced", "val")
test_dir = os.path.join(DATA_DIR, "CelebA_Aligned_Balanced", "test")

if not os.path.exists(train_dir):
    train_dir = os.path.join(DATA_DIR, "train")
    val_dir = os.path.join(DATA_DIR, "val")
    test_dir = os.path.join(DATA_DIR, "test")

print(f"Train dir: {train_dir}")
print(f"Val dir: {val_dir}")
print(f"Test dir: {test_dir}")

Train dir: /kaggle/input/celeba-aligned-balanced/CelebA_Aligned_Balanced/train
Val dir: /kaggle/input/celeba-aligned-balanced/CelebA_Aligned_Balanced/val
Test dir: /kaggle/input/celeba-aligned-balanced/CelebA_Aligned_Balanced/test


In [8]:
# Dataset class don gian
class SimpleDataset(Dataset):
    def __init__(self, data_root, transform, max_per_identity=None):
        self.transform = transform
        self.samples = []  # (path, identity_name)
        
        for identity in sorted(os.listdir(data_root)):
            identity_path = os.path.join(data_root, identity)
            if not os.path.isdir(identity_path): continue
            
            imgs = [f for f in os.listdir(identity_path) if f.lower().endswith(('.jpg', '.png'))]
            if max_per_identity:
                imgs = imgs[:max_per_identity]
            
            for img in imgs:
                self.samples.append((os.path.join(identity_path, img), identity))
    
    def __len__(self): return len(self.samples)
    
    def __getitem__(self, idx):
        path, identity = self.samples[idx]
        img = Image.open(path).convert('RGB')
        if self.transform: img = self.transform(img)
        return img, identity

In [9]:
# Extract embeddings cho train set (prototypes)
transform = get_val_transforms(image_size=112)
train_dataset = SimpleDataset(train_dir, transform, max_per_identity=5)  # Max 5 anh/identity de nhanh
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=False, num_workers=4)

print(f"Train samples: {len(train_dataset)}")

# Extract embeddings
identity_embeddings = defaultdict(list)

print("Extracting train embeddings...")
with torch.no_grad():
    for images, identities in tqdm(train_loader):
        images = images.to(device)
        embeddings = model.extract_features(images)  # [B, 512]
        embeddings = embeddings.cpu().numpy()
        
        for emb, identity in zip(embeddings, identities):
            identity_embeddings[identity].append(emb)

# Tinh prototype (mean embedding) cho moi identity
prototypes = {}
for identity, embs in identity_embeddings.items():
    mean_emb = np.mean(embs, axis=0)
    mean_emb = mean_emb / np.linalg.norm(mean_emb)  # L2 normalize
    prototypes[identity] = mean_emb

print(f"Built {len(prototypes)} identity prototypes")

Train samples: 46715
Extracting train embeddings...


100%|██████████| 365/365 [01:09<00:00,  5.25it/s]


Built 9343 identity prototypes


In [10]:
# Chuyen prototypes thanh matrix de tinh nhanh
identity_list = sorted(prototypes.keys())
identity_to_idx = {name: i for i, name in enumerate(identity_list)}
prototype_matrix = np.array([prototypes[name] for name in identity_list])  # [N, 512]

print(f"Prototype matrix: {prototype_matrix.shape}")

Prototype matrix: (9343, 512)


---
## 3. Evaluate with Embedding-based Classification

In [11]:
# Load eval dataset (val hoac test)
eval_dir = test_dir if os.path.exists(test_dir) else val_dir
eval_dataset = SimpleDataset(eval_dir, transform)
eval_loader = DataLoader(eval_dataset, batch_size=128, shuffle=False, num_workers=4)

print(f"Eval dir: {eval_dir}")
print(f"Eval samples: {len(eval_dataset)}")

Eval dir: /kaggle/input/celeba-aligned-balanced/CelebA_Aligned_Balanced/test
Eval samples: 20387


In [12]:
# Evaluate bang cosine similarity
all_true_labels = []  # index trong identity_list
all_pred_labels = []
all_similarities = []

print("Evaluating with cosine similarity...")
with torch.no_grad():
    for images, identities in tqdm(eval_loader):
        images = images.to(device)
        embeddings = model.extract_features(images).cpu().numpy()  # [B, 512]
        
        # Cosine similarity voi tat ca prototypes
        # embeddings: [B, 512], prototype_matrix: [N, 512]
        similarities = np.dot(embeddings, prototype_matrix.T)  # [B, N]
        
        # Top-1 predictions
        pred_indices = np.argmax(similarities, axis=1)
        
        for identity, pred_idx, sim_row in zip(identities, pred_indices, similarities):
            true_idx = identity_to_idx.get(identity, -1)
            if true_idx >= 0:
                all_true_labels.append(true_idx)
                all_pred_labels.append(pred_idx)
                # Đảm bảo sim_row là 1D array trước khi append
                sim_row_flat = np.array(sim_row).flatten()
                all_similarities.append(sim_row_flat)

# Kiểm tra số lượng samples TRƯỚC KHI convert
if len(all_true_labels) == 0:
    print("\n[ERROR] Không có samples nào có identity trong train set!")
    print("Nguyên nhân: Tất cả identities trong eval set không có trong train set.")
    print(f"Train set có {len(identity_list)} identities")
    if len(eval_dataset) > 0:
        sample_identities = list(set([i for _, i in eval_dataset.samples[:50]]))
        print(f"Sample eval identities (first 50 unique): {sample_identities[:10]}...")
        overlap = len(set(sample_identities) & set(identity_list))
        print(f"Overlap với train set: {overlap} identities")
    raise ValueError("Không có samples hợp lệ để evaluate")

print(f"\nEvaluation summary:")
print(f"  Total eval samples: {len(eval_dataset)}")
print(f"  Valid samples (có trong train set): {len(all_true_labels)}")

all_true_labels = np.array(all_true_labels)
all_pred_labels = np.array(all_pred_labels)

# Convert all_similarities thành 2D array [N_samples, N_identities]
if len(all_similarities) == 0:
    raise ValueError("Không có similarities nào được tính!")

all_similarities = np.array(all_similarities)

# Đảm bảo all_similarities là 2D array
if all_similarities.ndim == 1:
    # Nếu chỉ có 1 sample, reshape thành [1, N_identities]
    all_similarities = all_similarities.reshape(1, -1)
elif all_similarities.ndim == 0:
    raise ValueError("all_similarities is scalar, không hợp lệ")

print(f"Similarities shape: {all_similarities.shape}")
print(f"Expected: [N_samples={len(all_true_labels)}, N_identities={len(identity_list)}]")

Evaluating with cosine similarity...


100%|██████████| 160/160 [00:36<00:00,  4.42it/s]



Evaluation summary:
  Total eval samples: 20387
  Valid samples (có trong train set): 20387
Similarities shape: (20387, 9343)
Expected: [N_samples=20387, N_identities=9343]


In [13]:
# Tinh metrics
if len(all_true_labels) == 0:
    raise ValueError("Không có samples nào để evaluate!")

# Đảm bảo all_similarities là 2D
if all_similarities.ndim == 1:
    all_similarities = all_similarities.reshape(1, -1)

# Top-1 Accuracy
if len(all_pred_labels) > 0 and len(all_true_labels) > 0:
    top1_acc = (all_pred_labels == all_true_labels).mean() * 100
else:
    top1_acc = 0.0

# Top-5 Accuracy
if all_similarities.shape[1] >= 5:
    top5_preds = np.argsort(all_similarities, axis=1)[:, -5:]
    top5_correct = [t in p for t, p in zip(all_true_labels, top5_preds)]
    top5_acc = np.mean(top5_correct) * 100
else:
    top5_acc = top1_acc
    print(f"[INFO] Chỉ có {all_similarities.shape[1]} identities, Top-5 = Top-1")

print(f"\n{'='*50}")
print(f"EVALUATION RESULTS (Embedding-based)")
print(f"{'='*50}")
print(f"Total samples: {len(all_true_labels)}")
print(f"Top-1 Accuracy: {top1_acc:.2f}%")
print(f"Top-5 Accuracy: {top5_acc:.2f}%")


EVALUATION RESULTS (Embedding-based)
Total samples: 20387
Top-1 Accuracy: 88.14%
Top-5 Accuracy: 94.11%


In [14]:
# Confusion Matrix (top 20 classes)
sample_classes = 20
class_counts = np.bincount(all_true_labels, minlength=len(identity_list))
top_classes = np.argsort(class_counts)[-sample_classes:]
mask = np.isin(all_true_labels, top_classes)

labels_sub = all_true_labels[mask]
preds_sub = all_pred_labels[mask]

# Remap
label_map = {old: new for new, old in enumerate(sorted(set(labels_sub)))}
labels_re = np.array([label_map.get(l, -1) for l in labels_sub])
preds_re = np.array([label_map.get(p, -1) for p in preds_sub])
valid = (labels_re >= 0) & (preds_re >= 0)

cm = confusion_matrix(labels_re[valid], preds_re[valid])
plt.figure(figsize=(10, 8))
sns.heatmap(cm, cmap='Blues')
plt.title(f'Confusion Matrix (Top {sample_classes} Classes)')
plt.savefig('/kaggle/working/confusion_matrix.png', dpi=150)
plt.show()

In [15]:
# ROC Curve - Binary classification: correct vs incorrect prediction
print("Computing ROC Curve...")

# Sample
sample_size = min(5000, len(all_true_labels))
idx = np.random.choice(len(all_true_labels), sample_size, replace=False)
labels_sample = all_true_labels[idx]
sims_sample = all_similarities[idx]

preds_sample = all_pred_labels[idx]

# Binary labels: 1 = correct prediction, 0 = incorrect
y_true_binary = (labels_sample == preds_sample).astype(int)

# Scores: similarity to true class
y_scores = []
for i, true_idx in enumerate(labels_sample):
    y_scores.append(sims_sample[i, true_idx])

y_scores = np.array(y_scores)

fpr, tpr, thresholds = roc_curve(y_true_binary, y_scores)
roc_auc = auc(fpr, tpr)

# EER (Equal Error Rate)
eer_idx = np.argmin(np.abs(fpr - (1 - tpr)))
eer = fpr[eer_idx]
eer_threshold = thresholds[eer_idx]

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC (AUC = {roc_auc:.4f})', color='blue', lw=2)
plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.scatter([eer], [1-eer], color='red', s=100, zorder=5, 
           label=f'EER = {eer:.4f} (thresh={eer_threshold:.3f})')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ArcFace ROC Curve')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('/kaggle/working/roc_curve.png', dpi=150)
plt.show()

print(f"AUC: {roc_auc:.4f}")
print(f"EER: {eer:.4f} (threshold: {eer_threshold:.3f})")

Computing ROC Curve...
AUC: 0.9595
EER: 0.1013 (threshold: 0.563)


In [16]:
# Threshold Analysis
max_sims = np.max(all_similarities, axis=1)
is_correct = (all_pred_labels == all_true_labels).astype(int)

thresholds = np.arange(0.0, 1.0, 0.05)
accs, covs = [], []
for t in thresholds:
    m = max_sims >= t
    covs.append(m.mean() * 100)
    accs.append(is_correct[m].mean() * 100 if m.sum() > 0 else 0)

fig, ax1 = plt.subplots(figsize=(10, 5))
ax1.plot(thresholds, accs, 'b-', lw=2, label='Accuracy')
ax1.set_ylabel('Accuracy (%)', color='blue')
ax2 = ax1.twinx()
ax2.plot(thresholds, covs, 'r--', lw=2, label='Coverage')
ax2.set_ylabel('Coverage (%)', color='red')
plt.title('Accuracy vs Coverage at Thresholds')
plt.savefig('/kaggle/working/threshold_analysis.png', dpi=150)
plt.show()

---
## 4. Performance Tests

In [17]:
# Latency Test
dummy = torch.randn(1, 3, 112, 112).to(device)
for _ in range(10): model.extract_features(dummy)

latencies = []
for _ in range(100):
    if device == 'cuda': torch.cuda.synchronize()
    start = time.time()
    model.extract_features(torch.randn(1, 3, 112, 112).to(device))
    if device == 'cuda': torch.cuda.synchronize()
    latencies.append((time.time() - start) * 1000)

print(f"Latency: {np.mean(latencies):.2f} ms (avg)")

Latency: 7.02 ms (avg)


In [18]:
# Throughput Test
batch_sizes = [1, 16, 32, 64, 128]
throughputs = []
for bs in batch_sizes:
    dummy = torch.randn(bs, 3, 112, 112).to(device)
    for _ in range(5): model.extract_features(dummy)
    if device == 'cuda': torch.cuda.synchronize()
    start = time.time()
    for _ in range(20): model.extract_features(dummy)
    if device == 'cuda': torch.cuda.synchronize()
    throughputs.append((bs * 20) / (time.time() - start))

plt.bar([str(b) for b in batch_sizes], throughputs)
plt.ylabel('Throughput (img/s)')
plt.savefig('/kaggle/working/throughput.png', dpi=150)
plt.show()

---
## 5. t-SNE Visualization

In [19]:
# Extract eval embeddings for t-SNE
emb_list, lbl_list = [], []
with torch.no_grad():
    for img, identity in tqdm(eval_loader):
        if len(emb_list) * 128 >= 2000: break
        emb_list.append(model.extract_features(img.to(device)).cpu().numpy())
        lbl_list.extend([identity_to_idx.get(i, -1) for i in identity])

embs = np.concatenate(emb_list)[:2000]
lbls = np.array(lbl_list)[:2000]
valid = lbls >= 0
embs, lbls = embs[valid], lbls[valid]

# Top 50 classes
uniq = np.unique(lbls)[:50]
mask = np.isin(lbls, uniq)
embs, lbls = embs[mask], lbls[mask]

print(f"t-SNE samples: {len(embs)}")
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
embs_2d = tsne.fit_transform(embs)

plt.figure(figsize=(12, 8))
plt.scatter(embs_2d[:, 0], embs_2d[:, 1], c=lbls, cmap='tab20', s=10, alpha=0.6)
plt.title('t-SNE Embedding')
plt.savefig('/kaggle/working/tsne.png', dpi=150)
plt.show()

 10%|█         | 16/160 [00:02<00:18,  7.99it/s]
Exception ignored on calling ctypes callback function: <function ThreadpoolController._find_libraries_with_dl_iterate_phdr.<locals>.match_library_callback at 0x7d7b0c4ebec0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/threadpoolctl.py", line 1005, in match_library_callback
    self._make_controller_from_path(filepath)
  File "/usr/local/lib/python3.12/dist-packages/threadpoolctl.py", line 1187, in _make_controller_from_path
    lib_controller = controller_class(
                     ^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/threadpoolctl.py", line 114, in __init__
    self.dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/ctypes/__init__.py", line 379, in __init__
    self._handle = _dlopen(self._name, mode)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^
OSError: /usr/local/lib/python3.12/dis

t-SNE samples: 208


---
## 6. Final Report

In [20]:
report = {
    'timestamp': datetime.now().isoformat(),
    'method': 'embedding-based (cosine similarity)',
    'model': {
        'num_classes': int(num_classes),
        'epochs': int(checkpoint.get('epoch', 0) + 1),
        'training_val_acc': float(checkpoint.get('best_val_acc', 0))
    },
    'metrics': {
        'top1_accuracy': float(top1_acc),
        'top5_accuracy': float(top5_acc),
        'auc': float(roc_auc),
        'eer': float(eer) if 'eer' in globals() else None,
        'eer_threshold': float(eer_threshold) if 'eer_threshold' in globals() else None
    },
    'performance': {
        'avg_latency_ms': float(np.mean(latencies)),
        'max_throughput': float(max(throughputs))
    }
}

with open('/kaggle/working/evaluation_report.json', 'w') as f:
    json.dump(report, f, indent=2)

print("\n" + "="*50)
print("FINAL EVALUATION REPORT")
print("="*50)
print(f"Method: Embedding-based (cosine similarity)")
print(f"Top-1 Accuracy: {top1_acc:.2f}%")
print(f"Top-5 Accuracy: {top5_acc:.2f}%")
print(f"AUC: {roc_auc:.4f}")
if 'eer' in globals():
    print(f"EER: {eer:.4f} (threshold: {eer_threshold:.3f})")
print(f"Avg Latency: {np.mean(latencies):.2f} ms")


FINAL EVALUATION REPORT
Method: Embedding-based (cosine similarity)
Top-1 Accuracy: 88.14%
Top-5 Accuracy: 94.11%
AUC: 0.9595
EER: 0.1013 (threshold: 0.563)
Avg Latency: 7.02 ms


In [21]:
!ls -la /kaggle/working/*.png /kaggle/working/*.json

-rw-r--r-- 1 root root 43030 Dec 19 09:13 /kaggle/working/confusion_matrix.png
-rw-r--r-- 1 root root   511 Dec 19 09:13 /kaggle/working/evaluation_report.json
-rw-r--r-- 1 root root 62806 Dec 19 09:13 /kaggle/working/roc_curve.png
-rw-r--r-- 1 root root 57329 Dec 19 09:13 /kaggle/working/threshold_analysis.png
-rw-r--r-- 1 root root 45882 Dec 19 09:13 /kaggle/working/throughput.png
-rw-r--r-- 1 root root 80808 Dec 19 09:13 /kaggle/working/tsne.png


In [None]:
import os
import shutil
import zipfile
import numpy as np
import pandas as pd

base_dir = "/kaggle/working"
artifacts_dir = os.path.join(base_dir, "arcface_eval_artifacts")
os.makedirs(artifacts_dir, exist_ok=True)

# Lưu các dữ liệu nhẹ phục vụ trực quan báo cáo
# 1. ROC curve (fpr, tpr)
roc_df = pd.DataFrame({
    "fpr": np.array(fpr, dtype=float),
    "tpr": np.array(tpr, dtype=float),
})
roc_path = os.path.join(artifacts_dir, "roc_curve_data.csv")
roc_df.to_csv(roc_path, index=False)

# 2. Threshold analysis (threshold, accuracy, coverage)
thr_df = pd.DataFrame({
    "threshold": np.array(thresholds, dtype=float),
    "accuracy": np.array(accs, dtype=float),
    "coverage": np.array(covs, dtype=float),
})
thr_path = os.path.join(artifacts_dir, "threshold_analysis_data.csv")
thr_df.to_csv(thr_path, index=False)

# 3. t-SNE 2D embeddings (giới hạn ~2000 điểm như trên)
if "embs_2d" in globals() and "lbls" in globals():
    tsne_df = pd.DataFrame({
        "x": embs_2d[:, 0].astype(float),
        "y": embs_2d[:, 1].astype(float),
        "label_idx": lbls.astype(int),
    })
    tsne_path = os.path.join(artifacts_dir, "tsne_points.csv")
    tsne_df.to_csv(tsne_path, index=False)

# 4. Confusion matrix giá trị số + tên lớp
# Sử dụng lại biến từ cell vẽ confusion matrix, tránh lệch kích thước với mask của t-SNE
cm_path = os.path.join(artifacts_dir, "confusion_matrix_values.csv")
if "cm" in globals():
    try:
        # labels_sub và valid được tạo ở cell confusion matrix
        cm_original_indices = sorted(set(labels_sub[valid]))
        cm_class_names = [identity_list[i] for i in cm_original_indices]
        cm_df = pd.DataFrame(cm, index=cm_class_names, columns=cm_class_names)
        cm_df.to_csv(cm_path)
    except Exception as e:
        # Fallback: nếu thiếu biến, lưu ma trận với index mặc định
        cm_df = pd.DataFrame(cm)
        cm_df.to_csv(cm_path)
else:
    # Nếu chưa có cm (chưa chạy cell confusion matrix) thì bỏ qua phần này
    print("[WARN] Chưa có confusion matrix trong scope, bỏ qua lưu confusion_matrix_values.csv")

# 5. Copy các file báo cáo đã có (CSV mẫu, ảnh, JSON)
existing_files = [
    "eval_results_sample.csv",
    "confusion_matrix.png",
    "roc_curve.png",
    "threshold_analysis.png",
    "throughput.png",
    "tsne.png",
    "evaluation_report.json",
]

for fname in existing_files:
    src = os.path.join(base_dir, fname)
    if os.path.exists(src):
        shutil.copy(src, os.path.join(artifacts_dir, fname))

# 6. Đóng gói zip để tải nhanh
zip_path = os.path.join(base_dir, "arcface_eval_artifacts.zip")
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
    for root, _, files in os.walk(artifacts_dir):
        for f in files:
            full_path = os.path.join(root, f)
            rel_path = os.path.relpath(full_path, artifacts_dir)
            zf.write(full_path, arcname=rel_path)

print(f"Đã lưu artifacts vào thư mục: {artifacts_dir}")
print(f"Đã tạo file zip: {zip_path}")

!ls -lh /kaggle/working/arcface_eval_artifacts.zip

Đã lưu artifacts vào thư mục: /kaggle/working/arcface_eval_artifacts
Đã tạo file zip: /kaggle/working/arcface_eval_artifacts.zip
-rw-r--r-- 1 root root 248K Dec 19 09:25 /kaggle/working/arcface_eval_artifacts.zip
