# Face Recognition Threshold Calibration

This notebook helps calibrate the similarity threshold for face recognition using ROC and DET curves.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix
import seaborn as sns
from typing import List, Tuple

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

## 1. Load Validation Dataset

Prepare genuine (same person) and impostor (different person) pairs

In [None]:
# Example: Load or generate similarity scores
# In practice, compute these from your validation set

# Simulate genuine pairs (same person)
genuine_scores = np.random.beta(8, 2, 1000)  # Higher scores
genuine_labels = np.ones(len(genuine_scores))

# Simulate impostor pairs (different persons)
impostor_scores = np.random.beta(2, 8, 5000)  # Lower scores
impostor_labels = np.zeros(len(impostor_scores))

# Combine
all_scores = np.concatenate([genuine_scores, impostor_scores])
all_labels = np.concatenate([genuine_labels, impostor_labels])

print(f"Genuine pairs: {len(genuine_scores)}")
print(f"Impostor pairs: {len(impostor_scores)}")
print(f"Genuine score range: [{genuine_scores.min():.3f}, {genuine_scores.max():.3f}]")
print(f"Impostor score range: [{impostor_scores.min():.3f}, {impostor_scores.max():.3f}]")

## 2. Score Distribution Analysis

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram
axes[0].hist(genuine_scores, bins=50, alpha=0.7, label='Genuine', density=True, color='green')
axes[0].hist(impostor_scores, bins=50, alpha=0.7, label='Impostor', density=True, color='red')
axes[0].set_xlabel('Similarity Score')
axes[0].set_ylabel('Density')
axes[0].set_title('Score Distribution')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Box plot
data_to_plot = [genuine_scores, impostor_scores]
bp = axes[1].boxplot(data_to_plot, labels=['Genuine', 'Impostor'])
axes[1].set_ylabel('Similarity Score')
axes[1].set_title('Score Distribution (Box Plot)')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Statistics
print(f"Genuine - Mean: {genuine_scores.mean():.3f}, Std: {genuine_scores.std():.3f}")
print(f"Impostor - Mean: {impostor_scores.mean():.3f}, Std: {impostor_scores.std():.3f}")
print(f"Score overlap: {(genuine_scores.min() < impostor_scores.max()):.0%}")

## 3. ROC Curve Analysis

In [None]:
# Compute ROC curve
fpr, tpr, thresholds = roc_curve(all_labels, all_scores)
roc_auc = auc(fpr, tpr)

# Find optimal threshold points
eer_threshold_idx = np.argmin(np.abs(fpr - (1 - tpr)))  # Equal Error Rate
eer_threshold = thresholds[eer_threshold_idx]
eer = fpr[eer_threshold_idx]

# FAR targets
far_targets = [0.001, 0.01, 0.1]  # 0.1%, 1%, 10%
far_thresholds = []

for target_far in far_targets:
    idx = np.argmin(np.abs(fpr - target_far))
    far_thresholds.append({
        'FAR': fpr[idx],
        'TAR': tpr[idx],
        'Threshold': thresholds[idx]
    })

# Plot ROC curve
plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.3f})')
plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--', label='Random')

# Mark special points
plt.scatter(fpr[eer_threshold_idx], tpr[eer_threshold_idx], color='red', s=100, 
            label=f'EER = {eer:.3f} @ {eer_threshold:.3f}')

for far_point in far_thresholds:
    plt.scatter(far_point['FAR'], far_point['TAR'], s=100, 
                label=f"FAR={far_point['FAR']:.3f} @ {far_point['Threshold']:.3f}")

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Accept Rate (FAR)')
plt.ylabel('True Accept Rate (TAR)')
plt.title('ROC Curve - Face Recognition')
plt.legend(loc="lower right")
plt.grid(True, alpha=0.3)
plt.show()

# Print recommendations
print("\nRecommended Thresholds:")
print(f"EER Point: {eer_threshold:.3f} (Balanced approach)")
for i, far_point in enumerate(far_thresholds):
    use_case = ['High Security', 'Standard', 'Convenience'][i]
    print(f"FAR {far_point['FAR']:.3f}: {far_point['Threshold']:.3f} ({use_case})")

## 4. DET Curve Analysis

In [None]:
from scipy.stats import norm

# Compute FRR (False Reject Rate)
frr = 1 - tpr

# Convert to DET scale (probit)
def probit(p):
    """Convert probability to probit scale"""
    p = np.clip(p, 1e-7, 1 - 1e-7)  # Avoid infinities
    return norm.ppf(p)

# Plot DET curve
fig, ax = plt.subplots(figsize=(10, 8))

# Plot in probit scale
ax.plot(probit(fpr), probit(frr), 'b-', linewidth=2)

# Mark EER point
ax.scatter(probit(fpr[eer_threshold_idx]), probit(frr[eer_threshold_idx]), 
           color='red', s=100, zorder=5, label=f'EER = {eer:.3f}')

# Mark FAR points
for far_point in far_thresholds:
    idx = np.argmin(np.abs(fpr - far_point['FAR']))
    ax.scatter(probit(fpr[idx]), probit(frr[idx]), s=100, zorder=5,
              label=f"FAR = {far_point['FAR']:.3f}")

# Set ticks in probability scale
ticks = [0.001, 0.01, 0.05, 0.1, 0.2, 0.5]
tick_labels = ['0.1%', '1%', '5%', '10%', '20%', '50%']

ax.set_xticks([probit(t) for t in ticks])
ax.set_xticklabels(tick_labels)
ax.set_yticks([probit(t) for t in ticks])
ax.set_yticklabels(tick_labels)

ax.set_xlabel('False Accept Rate (FAR)')
ax.set_ylabel('False Reject Rate (FRR)')
ax.set_title('DET Curve - Face Recognition')
ax.grid(True, alpha=0.3)
ax.legend()

plt.show()

## 5. Threshold Selection Tool

In [None]:
def evaluate_threshold(threshold, scores, labels):
    """Evaluate performance at specific threshold"""
    predictions = scores >= threshold
    
    tn, fp, fn, tp = confusion_matrix(labels, predictions).ravel()
    
    far = fp / (fp + tn) if (fp + tn) > 0 else 0
    frr = fn / (fn + tp) if (fn + tp) > 0 else 0
    tar = 1 - frr
    accuracy = (tp + tn) / len(labels)
    
    return {
        'Threshold': threshold,
        'FAR': far,
        'FRR': frr,
        'TAR': tar,
        'Accuracy': accuracy,
        'TP': tp,
        'TN': tn,
        'FP': fp,
        'FN': fn
    }

# Interactive threshold selection
test_thresholds = [0.5, 0.6, 0.65, 0.7, 0.75, 0.8]
results = []

for thresh in test_thresholds:
    result = evaluate_threshold(thresh, all_scores, all_labels)
    results.append(result)

# Display results table
df_results = pd.DataFrame(results)
df_results['F1'] = 2 * df_results['TP'] / (2 * df_results['TP'] + df_results['FP'] + df_results['FN'])

print("\nThreshold Analysis Table:")
print(df_results[['Threshold', 'FAR', 'FRR', 'TAR', 'Accuracy', 'F1']].round(4).to_string(index=False))

# Visualize trade-offs
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# FAR vs FRR
axes[0].plot(df_results['Threshold'], df_results['FAR'], 'r-o', label='FAR')
axes[0].plot(df_results['Threshold'], df_results['FRR'], 'b-o', label='FRR')
axes[0].set_xlabel('Threshold')
axes[0].set_ylabel('Error Rate')
axes[0].set_title('FAR vs FRR Trade-off')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy and F1
axes[1].plot(df_results['Threshold'], df_results['Accuracy'], 'g-o', label='Accuracy')
axes[1].plot(df_results['Threshold'], df_results['F1'], 'm-o', label='F1 Score')
axes[1].set_xlabel('Threshold')
axes[1].set_ylabel('Score')
axes[1].set_title('Accuracy and F1 Score')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Export Configuration

In [None]:
# Select final threshold
selected_threshold = 0.65  # Adjust based on your requirements

final_metrics = evaluate_threshold(selected_threshold, all_scores, all_labels)

print("\n" + "="*50)
print("FINAL THRESHOLD CONFIGURATION")
print("="*50)
print(f"\nSelected Threshold: {selected_threshold}")
print(f"\nExpected Performance:")
print(f"  - False Accept Rate (FAR): {final_metrics['FAR']:.2%}")
print(f"  - False Reject Rate (FRR): {final_metrics['FRR']:.2%}")
print(f"  - True Accept Rate (TAR):  {final_metrics['TAR']:.2%}")
print(f"  - Overall Accuracy:        {final_metrics['Accuracy']:.2%}")

print(f"\n📝 Update your .env file:")
print(f"   SIMILARITY_THRESHOLD={selected_threshold}")

# Save configuration
config = {
    'threshold': selected_threshold,
    'expected_far': final_metrics['FAR'],
    'expected_frr': final_metrics['FRR'],
    'validation_auc': roc_auc,
    'validation_samples': len(all_labels)
}

import json
with open('threshold_config.json', 'w') as f:
    json.dump(config, f, indent=2)

print(f"\n✅ Configuration saved to threshold_config.json")