# Imports

In [2]:
from tqdm import tqdm
import numpy as np
import tensorflow
import numpy as np
import os




In [3]:
tensorflow.keras

<KerasLazyLoader (keras_2) keras.api._v2.keras as keras mode=None>

In [None]:

# from sklearn.model_selection import train_test_split
from deepface import DeepFace
import faiss

# Required parameters

In [5]:
DATASET_PATH = "./Dataset_cropped_MLclass"  # change this as needed
MODEL_NAME = "Facenet"

Lists to hold the values

In [6]:
embeddings = []
labels = []
# Count total images for progress bar
all_image_paths = []
all_labels = []

## load all the images from the folders and generate embeddings
the folder structure is given by:

|- person1  
|---- img1  
|---- img2  
|- person2  
|----- img1  

In [None]:
def load_images_and_embeddings(dataset_path, model_name="Facenet", generate_embeddings=True) -> dict:
    image_paths = []
    labels = []
    filenames = []
    embeddings = []

    for person_name in os.listdir(dataset_path):
        person_folder = os.path.join(dataset_path, person_name)
        if not os.path.isdir(person_folder):
            continue

        for img_name in os.listdir(person_folder):
            if img_name.lower().endswith((".jpg", ".jpeg", ".png")):
                img_path = os.path.join(person_folder, img_name)
                image_paths.append(img_path)
                labels.append(person_name)
                filenames.append(img_name)

    if generate_embeddings:
        for img_path in tqdm(image_paths, desc="Generating Embeddings"):
            try:
                result = DeepFace.represent(
                    img_path=img_path,
                    model_name=model_name,
                    enforce_detection=False
                )[0]
                embeddings.append(result["embedding"])
            except Exception as e:
                print(f"Failed to process {img_path}: {e}")
                embeddings.append(None)  # to keep alignment

    return {
        "paths": image_paths,
        "labels": labels,
        "filenames": filenames,
        "embeddings": embeddings if generate_embeddings else None
    }


# What DeepFace does automatically:
- Face Detection: Uses OpenCV, RetinaFace, or MTCNN to find the face in the image.

- Face Alignment: Aligns the detected face by rotating and centering based on eye/mouth positions.

- Image Resizing: Resizes the face to the exact input size required by the selected model (e.g., 160×160 for Facenet, 112×112 for ArcFace).

- Normalization & Tensor Conversion: Converts the image to the proper format (e.g., pixel value scaling, tensor dtype, channel ordering).

- Model Inference: Passes the processed face through the model to get the embedding.

In [9]:
#usage

data = load_images_and_embeddings(DATASET_PATH, model_name="Facenet", generate_embeddings=True)

# Access results
image_paths = data["paths"]
labels = data["labels"]
filenames = data["filenames"]
embeddings = data["embeddings"]

Generating Embeddings: 100%|██████████| 1180/1180 [08:03<00:00,  2.44it/s]


# Save and load the Embeddings and labels for future use

In [11]:
np.save("embeddings.npy", np.array(embeddings))
np.save("labels.npy", np.array(labels))

In [12]:
embeddings = np.load('embeddings.npy')
labels = np.load('labels.npy')

In [13]:
# Make sure embeddings are float32
embedding_matrix = np.array(embeddings).astype("float32")

### The shape is given by (number of images , dimension returned by facenet)  
which is (1180, 128)


In [14]:
embedding_matrix.shape

(1180, 128)

# Create the Vector DB

In [15]:
# Create FAISS index (cosine similarity via inner product on normalized vectors)
dimension = embedding_matrix.shape[1]
index = faiss.IndexFlatIP(dimension)  # IP = inner product
faiss.normalize_L2(embedding_matrix)  # normalize vectors for cosine similarity


In [16]:
index.add(embedding_matrix)

In [17]:
faiss.write_index(index, "face_index.faiss")

In [18]:
index = faiss.read_index('face_index.faiss')

In [None]:
# returns a list of tuples
# tuple has (label, file_path, similarity score)
def search_similar_faces(img_path, k=5) -> list[tuple]:
    try:
        # Step 1: Generate embedding from image
        result = DeepFace.represent(
            img_path=img_path,
            model_name=MODEL_NAME,
            enforce_detection=False
        )[0]

        query_vector = np.array([result["embedding"]]).astype("float32")
        faiss.normalize_L2(query_vector)

        # Step 2: Search in FAISS
        distances, indices = index.search(query_vector, k)

        # Step 3: Get results
        results = []
        for i in range(k):
            idx = indices[0][i]
            sim = distances[0][i]
            # label = metadata[idx]["label"]
            label = labels[idx]
            # file_path = metadata[idx]["file"]
            file_path = image_paths[idx]
            results.append((label, file_path, sim))

        return results

    except Exception as e:
        print(f"Error during search: {e}")
        return []


In [27]:
# query_image = "harshimage.jpg"
query_image = '0220_18.jpeg'
matches = search_similar_faces(query_image, k=1)

for label, filepath, similarity in matches:
    print(f"Matched: {label} | File: {filepath} | Similarity: {similarity:.4f}")


Matched: xxxx