In [8]:
import os
import cv2
import random
import numpy as np

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go


from deepface import DeepFace

from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score


from collections import Counter

# Step 1: Evaluate Deepface Models

- Facenet
- Facenet512
- GhostFaceNet
- ArcFace
- SFace
- Dlib

In [9]:
def evaluate_multiclass_one_ori_against_aug(
    data_dir,
    model_name="Facenet",
    enforce_detection=False,
    title="Multi-class Face Recognition (One ORI vs. AUG)"
):
    """
    Performs multi-class face recognition in a dataset where each subfolder represents a class.
    Each class folder contains multiple images, including at least one 'ori_' image and 
    multiple 'aug_' images.

    1. For each class, pick exactly one 'ori_' image as the reference (enrollment) image.
    2. Compute embeddings for all 'aug_' images across all classes, comparing each to every 
       class's reference embedding (Euclidean distance).
    3. Assign each 'aug_' image to the class with the nearest reference embedding.
    4. Generate a multi-class classification report.
    """

    # 1. Gather all classes and separate 'ori_' from 'aug_' images
    subfolders = [f.path for f in os.scandir(data_dir) if f.is_dir()]

    # We'll store references in a dict: { class_name: embedding_of_one_ori (as a NumPy array) }
    class_reference_embeddings = {}
    # We'll store test images in a list of (img_path, class_name)
    test_images = []

    for subfolder in subfolders:
        class_name = os.path.basename(subfolder)
        all_images = [
            os.path.join(subfolder, img)
            for img in os.listdir(subfolder)
            if img.lower().endswith((".jpg", ".jpeg", ".png"))
        ]

        # Separate ori_ and aug_ images
        ori_imgs = [img for img in all_images if "ori_" in os.path.basename(img)]
        aug_imgs = [img for img in all_images if "aug_" in os.path.basename(img)]

        # Pick exactly one ori_ image as the reference (e.g., the first one if multiple exist)
        if ori_imgs:
            reference_img = ori_imgs[0]  # pick the first
            try:
                rep = DeepFace.represent(
                    reference_img,
                    model_name=model_name,
                    enforce_detection=enforce_detection
                )
                if rep and len(rep) > 0:
                    # Convert the embedding list to a NumPy array
                    emb_array = np.array(rep[0]["embedding"], dtype=np.float32)
                    class_reference_embeddings[class_name] = emb_array
                else:
                    print(f"Warning: No embedding found for {reference_img}. Skipping class '{class_name}'")
                    continue
            except Exception as e:
                print(f"Error generating embedding for {reference_img}: {e}")
                continue
        else:
            # If there's no ori image, skip the class
            print(f"Warning: No 'ori_' image found for class '{class_name}'. Skipping.")
            continue

        # All aug_ images become test images
        for aug_img in aug_imgs:
            test_images.append((aug_img, class_name))

    if not class_reference_embeddings:
        print("No classes with valid reference embeddings found.")
        return {}

    if not test_images:
        print("No test (aug) images found.")
        return {}

    # 2. For each test image, find the class with the nearest reference embedding
    true_labels = []
    pred_labels = []

    for (img_path, true_class) in test_images:
        true_labels.append(true_class)

        try:
            rep = DeepFace.represent(
                img_path,
                model_name=model_name,
                enforce_detection=enforce_detection
            )
            if rep and len(rep) > 0:
                # Convert the embedding list to a NumPy array
                emb = np.array(rep[0]["embedding"], dtype=np.float32)
            else:
                # If no embedding, label as "Unknown"
                pred_labels.append("Unknown")
                continue
        except Exception as e:
            print(f"Error generating embedding for {img_path}: {e}")
            pred_labels.append("Unknown")
            continue

        # Compute Euclidean distance to each class reference
        best_class = None
        best_dist = float("inf")
        for c_name, c_emb in class_reference_embeddings.items():
            # Now c_emb and emb are both NumPy arrays, so subtraction works
            dist = np.linalg.norm(emb - c_emb)
            if dist < best_dist:
                best_dist = dist
                best_class = c_name

        if best_class is not None:
            pred_labels.append(best_class)
        else:
            pred_labels.append("Unknown")

    # 3. Compute multi-class metrics
    all_classes = sorted(class_reference_embeddings.keys())
    # If "Unknown" is in predictions, optionally add it to the label set
    if "Unknown" in pred_labels:
        all_classes += ["Unknown"]

    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, average="macro", zero_division=0)
    recall = recall_score(true_labels, pred_labels, average="macro", zero_division=0)
    f1 = f1_score(true_labels, pred_labels, average="macro", zero_division=0)

    print(title)
    print("=" * len(title))
    print(f"Accuracy : {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall   : {recall:.4f}")
    print(f"F1-score : {f1:.4f}\n")

    # Print full classification report
    from sklearn.metrics import classification_report
    report = classification_report(true_labels, pred_labels, labels=all_classes, zero_division=0)
    print("Full Classification Report:")
    print(report)

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }

In [10]:
test_data_dir = "lfw-deepfunneled-augmentedV1-test" # Test dataset directory

evaluate_multiclass_one_ori_against_aug(
    data_dir=test_data_dir,
    title="DeepFace - Facenet Multi-class Face Recognition (One ORI vs. AUG)",
    model_name="Facenet",
    enforce_detection=False,
)

DeepFace - Facenet Multi-class Face Recognition (One ORI vs. AUG)
Accuracy : 0.8780
Precision: 0.9371
Recall   : 0.8780
F1-score : 0.8887

Full Classification Report:
                               precision    recall  f1-score   support

                Barry_Alvarez       1.00      1.00      1.00        10
            Begum_Khaleda_Zia       0.56      0.50      0.53        10
                  Bill_Nelson       1.00      1.00      1.00        10
             Boris_Berezovsky       1.00      1.00      1.00        10
                  Brian_Cowen       1.00      1.00      1.00        10
                   Cesar_Maia       1.00      0.90      0.95        10
              Charles_Kartman       1.00      1.00      1.00        10
              Charles_Schumer       1.00      0.80      0.89        10
                Chen_Liang_Yu       1.00      1.00      1.00        10
                 Chris_Tucker       1.00      1.00      1.00        10
                  Chuck_Amato       1.00      1.00 

{'accuracy': 0.878,
 'precision': 0.9370620929249961,
 'recall': 0.878,
 'f1_score': 0.8887110364750167}

In [11]:
evaluate_multiclass_one_ori_against_aug(
    data_dir=test_data_dir,
    title="DeepFace - Facenet512 Multi-class Face Recognition (One ORI vs. AUG)",
    model_name="Facenet512",
    enforce_detection=False,
)

DeepFace - Facenet512 Multi-class Face Recognition (One ORI vs. AUG)
Accuracy : 0.9110
Precision: 0.9352
Recall   : 0.9110
F1-score : 0.9101

Full Classification Report:
                               precision    recall  f1-score   support

                Barry_Alvarez       1.00      1.00      1.00        10
            Begum_Khaleda_Zia       0.71      0.50      0.59        10
                  Bill_Nelson       1.00      1.00      1.00        10
             Boris_Berezovsky       1.00      1.00      1.00        10
                  Brian_Cowen       1.00      1.00      1.00        10
                   Cesar_Maia       1.00      0.90      0.95        10
              Charles_Kartman       0.67      1.00      0.80        10
              Charles_Schumer       1.00      0.80      0.89        10
                Chen_Liang_Yu       1.00      1.00      1.00        10
                 Chris_Tucker       0.77      1.00      0.87        10
                  Chuck_Amato       1.00      1.

{'accuracy': 0.911,
 'precision': 0.9351989676989677,
 'recall': 0.9109999999999999,
 'f1_score': 0.9101488083340956}

In [12]:
evaluate_multiclass_one_ori_against_aug(
    data_dir=test_data_dir,
    title="DeepFace - GhostFaceNet Multi-class Face Recognition (One ORI vs. AUG)",
    model_name="GhostFaceNet",
    enforce_detection=False,
)

DeepFace - GhostFaceNet Multi-class Face Recognition (One ORI vs. AUG)
Accuracy : 0.9200
Precision: 0.9663
Recall   : 0.9200
F1-score : 0.9303

Full Classification Report:
                               precision    recall  f1-score   support

                Barry_Alvarez       1.00      1.00      1.00        10
            Begum_Khaleda_Zia       1.00      1.00      1.00        10
                  Bill_Nelson       1.00      1.00      1.00        10
             Boris_Berezovsky       1.00      1.00      1.00        10
                  Brian_Cowen       1.00      1.00      1.00        10
                   Cesar_Maia       1.00      0.90      0.95        10
              Charles_Kartman       1.00      1.00      1.00        10
              Charles_Schumer       1.00      0.80      0.89        10
                Chen_Liang_Yu       1.00      1.00      1.00        10
                 Chris_Tucker       1.00      1.00      1.00        10
                  Chuck_Amato       1.00      

{'accuracy': 0.92,
 'precision': 0.9663131313131313,
 'recall': 0.92,
 'f1_score': 0.9303175300104093}

In [13]:
evaluate_multiclass_one_ori_against_aug(
    data_dir=test_data_dir,
    title="DeepFace - ArcFace Multi-class Face Recognition (One ORI vs. AUG)",
    model_name="ArcFace",
    enforce_detection=False,
)

DeepFace - ArcFace Multi-class Face Recognition (One ORI vs. AUG)
Accuracy : 0.8920
Precision: 0.9389
Recall   : 0.8920
F1-score : 0.9019

Full Classification Report:
                               precision    recall  f1-score   support

                Barry_Alvarez       1.00      1.00      1.00        10
            Begum_Khaleda_Zia       0.73      0.80      0.76        10
                  Bill_Nelson       1.00      1.00      1.00        10
             Boris_Berezovsky       1.00      1.00      1.00        10
                  Brian_Cowen       1.00      1.00      1.00        10
                   Cesar_Maia       0.56      0.50      0.53        10
              Charles_Kartman       1.00      1.00      1.00        10
              Charles_Schumer       1.00      0.80      0.89        10
                Chen_Liang_Yu       1.00      1.00      1.00        10
                 Chris_Tucker       1.00      0.80      0.89        10
                  Chuck_Amato       1.00      1.00 

{'accuracy': 0.892,
 'precision': 0.9389350926850927,
 'recall': 0.8919999999999999,
 'f1_score': 0.901940015915973}

In [14]:
evaluate_multiclass_one_ori_against_aug(
    data_dir=test_data_dir,
    title="DeepFace - SFace Multi-class Face Recognition (One ORI vs. AUG)",
    model_name="SFace",
    enforce_detection=False,
)

DeepFace - SFace Multi-class Face Recognition (One ORI vs. AUG)
Accuracy : 0.8730
Precision: 0.9630
Recall   : 0.8730
F1-score : 0.8966

Full Classification Report:
                               precision    recall  f1-score   support

                Barry_Alvarez       1.00      1.00      1.00        10
            Begum_Khaleda_Zia       1.00      1.00      1.00        10
                  Bill_Nelson       1.00      1.00      1.00        10
             Boris_Berezovsky       1.00      1.00      1.00        10
                  Brian_Cowen       0.77      1.00      0.87        10
                   Cesar_Maia       1.00      0.50      0.67        10
              Charles_Kartman       1.00      1.00      1.00        10
              Charles_Schumer       1.00      0.80      0.89        10
                Chen_Liang_Yu       1.00      1.00      1.00        10
                 Chris_Tucker       1.00      0.50      0.67        10
                  Chuck_Amato       1.00      1.00   

{'accuracy': 0.873,
 'precision': 0.9629971417471417,
 'recall': 0.873,
 'f1_score': 0.8965604644603837}