# FaceNet Evaluation - Kaggle

Evaluation notebook for FaceNet model using embedding-based verification.

## Approach:
- Extract embeddings for train set (prototypes)
- Extract embeddings for eval set
- Classification using cosine similarity
- Metrics: Top-1/Top-5 accuracy, ROC-AUC, threshold analysis

In [None]:
import os, sys, time, json
import shutil, glob
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from collections import defaultdict

ROOT = "/kaggle/working/FaceRecognition"
CHECKPOINT_DIR = "/kaggle/working/checkpoints/facenet"
KAGGLE_DATASET_NAME = "celeba-aligned-balanced"
DATA_DIR = f"/kaggle/input/{KAGGLE_DATASET_NAME}"
CHECKPOINT_DATASET_NAME = "facenet-checkpoints"

os.makedirs(CHECKPOINT_DIR, exist_ok=True)

In [None]:
# Copy checkpoint
checkpoint_input = f"/kaggle/input/{CHECKPOINT_DATASET_NAME}"
if os.path.exists(checkpoint_input):
    for pth in glob.glob(os.path.join(checkpoint_input, "**/*.pth"), recursive=True):
        dest = os.path.join(CHECKPOINT_DIR, os.path.basename(pth))
        if not os.path.exists(dest): shutil.copy(pth, dest)
    print(f"Checkpoints: {os.listdir(CHECKPOINT_DIR)}")

In [None]:
# Clone repo
REPO_URL = "https://github.com/sin0235/FaceRecognition.git"
if os.path.exists(ROOT):
    %cd {ROOT}
    !git pull
else:
    !git clone {REPO_URL} {ROOT}
    %cd {ROOT}
if ROOT not in sys.path: sys.path.insert(0, ROOT)

In [None]:
!pip install -q opencv-python-headless Pillow scikit-learn tqdm pyyaml matplotlib seaborn

In [None]:
import torch
from PIL import Image
from sklearn.metrics import confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.manifold import TSNE

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")

---
## 1. Load FaceNet Model

In [None]:
from models.facenet.facenet_model import FaceNetModel

checkpoint_path = os.path.join(CHECKPOINT_DIR, "facenet_best.pth")
checkpoint = torch.load(checkpoint_path, map_location='cpu', weights_only=False)

model = FaceNetModel(embedding_size=128, pretrained=False)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device).eval()

print(f"Model loaded: embedding_size={128}")
print(f"Training epochs: {checkpoint.get('epoch', 0) + 1}")
print(f"Best val acc (training): {checkpoint.get('best_val_acc', 0):.2f}%")

---
## 2. Build Reference Database

In [None]:
from models.facenet.facenet_dataloader import get_val_transforms
from torch.utils.data import DataLoader, Dataset

# Find data dirs
train_dir = os.path.join(DATA_DIR, "CelebA_Aligned_Balanced", "train")
val_dir = os.path.join(DATA_DIR, "CelebA_Aligned_Balanced", "val")
test_dir = os.path.join(DATA_DIR, "CelebA_Aligned_Balanced", "test")

if not os.path.exists(train_dir):
    train_dir = os.path.join(DATA_DIR, "train")
    val_dir = os.path.join(DATA_DIR, "val")
    test_dir = os.path.join(DATA_DIR, "test")

print(f"Train dir: {train_dir}")
print(f"Val dir: {val_dir}")
print(f"Test dir: {test_dir}")

In [None]:
# Simple dataset class
class SimpleDataset(Dataset):
    def __init__(self, data_root, transform, max_per_identity=None):
        self.transform = transform
        self.samples = []  # (path, identity_name)
        
        for identity in sorted(os.listdir(data_root)):
            identity_path = os.path.join(data_root, identity)
            if not os.path.isdir(identity_path): continue
            
            imgs = [f for f in os.listdir(identity_path) if f.lower().endswith(('.jpg', '.png'))]
            if max_per_identity:
                imgs = imgs[:max_per_identity]
            
            for img in imgs:
                self.samples.append((os.path.join(identity_path, img), identity))
    
    def __len__(self): return len(self.samples)
    
    def __getitem__(self, idx):
        path, identity = self.samples[idx]
        img = Image.open(path).convert('RGB')
        if self.transform: img = self.transform(img)
        return img, identity

In [None]:
# Extract train embeddings (prototypes)
transform = get_val_transforms(image_size=160)  # FaceNet uses 160x160
train_dataset = SimpleDataset(train_dir, transform, max_per_identity=5)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=False, num_workers=4)

print(f"Train samples: {len(train_dataset)}")

# Extract embeddings
identity_embeddings = defaultdict(list)

print("Extracting train embeddings...")
with torch.no_grad():
    for images, identities in tqdm(train_loader):
        images = images.to(device)
        embeddings = model(images)  # [B, 128]
        embeddings = embeddings.cpu().numpy()
        
        for emb, identity in zip(embeddings, identities):
            identity_embeddings[identity].append(emb)

# Compute prototypes (mean embeddings)
prototypes = {}
for identity, embs in identity_embeddings.items():
    mean_emb = np.mean(embs, axis=0)
    mean_emb = mean_emb / np.linalg.norm(mean_emb)  # L2 normalize
    prototypes[identity] = mean_emb

print(f"Built {len(prototypes)} identity prototypes")

In [None]:
# Convert to matrix for fast computation
identity_list = sorted(prototypes.keys())
identity_to_idx = {name: i for i, name in enumerate(identity_list)}
prototype_matrix = np.array([prototypes[name] for name in identity_list])  # [N, 128]

print(f"Prototype matrix: {prototype_matrix.shape}")

---
## 3. Evaluate with Cosine Similarity

In [None]:
# Load eval dataset
eval_dir = test_dir if os.path.exists(test_dir) else val_dir
eval_dataset = SimpleDataset(eval_dir, transform)
eval_loader = DataLoader(eval_dataset, batch_size=128, shuffle=False, num_workers=4)

print(f"Eval dir: {eval_dir}")
print(f"Eval samples: {len(eval_dataset)}")

In [None]:
# Evaluate using cosine similarity
all_true_labels = []
all_pred_labels = []
all_similarities = []

print("Evaluating with cosine similarity...")
with torch.no_grad():
    for images, identities in tqdm(eval_loader):
        images = images.to(device)
        embeddings = model(images).cpu().numpy()  # [B, 128]
        
        # Cosine similarity with all prototypes
        similarities = np.dot(embeddings, prototype_matrix.T)  # [B, N]
        
        # Top-1 predictions
        pred_indices = np.argmax(similarities, axis=1)
        
        for identity, pred_idx, sim_row in zip(identities, pred_indices, similarities):
            true_idx = identity_to_idx.get(identity, -1)
            if true_idx >= 0:
                all_true_labels.append(true_idx)
                all_pred_labels.append(pred_idx)
                all_similarities.append(sim_row)

all_true_labels = np.array(all_true_labels)
all_pred_labels = np.array(all_pred_labels)
all_similarities = np.array(all_similarities)

In [None]:
# Compute metrics
# Top-1 Accuracy
top1_acc = (all_pred_labels == all_true_labels).mean() * 100

# Top-5 Accuracy
top5_preds = np.argsort(all_similarities, axis=1)[:, -5:]
top5_correct = [t in p for t, p in zip(all_true_labels, top5_preds)]
top5_acc = np.mean(top5_correct) * 100

print(f"\n{'='*50}")
print(f"FaceNet EVALUATION RESULTS")
print(f"{'='*50}")
print(f"Total samples: {len(all_true_labels)}")
print(f"Top-1 Accuracy: {top1_acc:.2f}%")
print(f"Top-5 Accuracy: {top5_acc:.2f}%")

In [None]:
# Confusion Matrix (top 20 classes)
sample_classes = 20
class_counts = np.bincount(all_true_labels, minlength=len(identity_list))
top_classes = np.argsort(class_counts)[-sample_classes:]
mask = np.isin(all_true_labels, top_classes)

labels_sub = all_true_labels[mask]
preds_sub = all_pred_labels[mask]

# Remap
label_map = {old: new for new, old in enumerate(sorted(set(labels_sub)))}
labels_re = np.array([label_map.get(l, -1) for l in labels_sub])
preds_re = np.array([label_map.get(p, -1) for p in preds_sub])
valid = (labels_re >= 0) & (preds_re >= 0)

cm = confusion_matrix(labels_re[valid], preds_re[valid])
plt.figure(figsize=(10, 8))
sns.heatmap(cm, cmap='Blues')
plt.title(f'Confusion Matrix (Top {sample_classes} Classes)')
plt.savefig('/kaggle/working/facenet_confusion_matrix.png', dpi=150)
plt.show()

In [None]:
# ROC Curve
print("Computing ROC Curve...")

sample_size = min(5000, len(all_true_labels))
idx = np.random.choice(len(all_true_labels), sample_size, replace=False)
labels_sample = all_true_labels[idx]
sims_sample = all_similarities[idx]

classes = np.unique(labels_sample)
y_true_bin = label_binarize(labels_sample, classes=classes)
sims_for_classes = sims_sample[:, classes]

fpr, tpr, _ = roc_curve(y_true_bin.ravel(), sims_for_classes.ravel())
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC (AUC = {roc_auc:.4f})', color='blue', lw=2)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('FaceNet ROC Curve')
plt.legend()
plt.savefig('/kaggle/working/facenet_roc_curve.png', dpi=150)
plt.show()

print(f"AUC: {roc_auc:.4f}")

In [None]:
# Threshold Analysis
max_sims = np.max(all_similarities, axis=1)
is_correct = (all_pred_labels == all_true_labels).astype(int)

thresholds = np.arange(0.0, 1.0, 0.05)
accs, covs = [], []
for t in thresholds:
    m = max_sims >= t
    covs.append(m.mean() * 100)
    accs.append(is_correct[m].mean() * 100 if m.sum() > 0 else 0)

fig, ax1 = plt.subplots(figsize=(10, 5))
ax1.plot(thresholds, accs, 'b-', lw=2, label='Accuracy')
ax1.set_ylabel('Accuracy (%)', color='blue')
ax2 = ax1.twinx()
ax2.plot(thresholds, covs, 'r--', lw=2, label='Coverage')
ax2.set_ylabel('Coverage (%)', color='red')
plt.title('FaceNet: Accuracy vs Coverage')
plt.savefig('/kaggle/working/facenet_threshold_analysis.png', dpi=150)
plt.show()

---
## 4. Final Report

In [None]:
report = {
    'timestamp': datetime.now().isoformat(),
    'model': 'FaceNet',
    'method': 'embedding-based (cosine similarity)',
    'embedding_size': 128,
    'metrics': {
        'top1_accuracy': float(top1_acc),
        'top5_accuracy': float(top5_acc),
        'auc': float(roc_auc)
    },
    'eval_samples': int(len(all_true_labels)),
    'num_identities': int(len(identity_list))
}

with open('/kaggle/working/facenet_evaluation_report.json', 'w') as f:
    json.dump(report, f, indent=2)

print("\n" + "="*50)
print("FACENET FINAL REPORT")
print("="*50)
print(f"Top-1 Accuracy: {top1_acc:.2f}%")
print(f"Top-5 Accuracy: {top5_acc:.2f}%")
print(f"AUC: {roc_auc:.4f}")
print(f"\nReport saved to: facenet_evaluation_report.json")