In [22]:
import cv2
import numpy as np
import faiss
import os
from deepface import DeepFace
import json

# Load YuNet model for face detection
yunet = cv2.FaceDetectorYN.create(
    model="face_detection_yunet_2023mar.onnx",  # Pre-trained ONNX model path
    config="",
    input_size=(320, 320),  # Input image size
    score_threshold=0.9,
    nms_threshold=0.3,
    top_k=5000
)

In [23]:
def detect_and_crop_faces(image_path, output_folder=None, return_boxes=False):
    """
    Detect faces in an image, crop them, optionally save them, and optionally return face bounding boxes.

    Args:
        image_path (str): Path to the input image.
        output_folder (str, optional): Folder to save cropped face images. If None, faces are not saved.
        return_boxes (bool): Whether to return face bounding boxes.

    Returns:
        list: List of cropped face images.
        list (optional): List of bounding boxes [(x, y, width, height), ...].
    """
    # Read the input image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Could not read {image_path}")
        return [] if not return_boxes else ([], [])

    # Set YuNet input size
    height, width = img.shape[:2]
    yunet.setInputSize((width, height))
    
    # Detect faces
    _, faces = yunet.detect(img)

    cropped_faces = []
    face_boxes = []

    if faces is not None:
        for idx, face in enumerate(faces):
            x, y, w, h = face[:4].astype(int)
            # Crop the face from the image
            cropped_face = img[y:y+h, x:x+w]
            cropped_faces.append(cropped_face)
            face_boxes.append((x, y, w, h))  # Store the bounding box coordinates
            
            # Save the cropped face if output_folder is provided
            if output_folder:
                os.makedirs(output_folder, exist_ok=True)  # Create the folder if it doesn't exist
                face_filename = os.path.join(
                    output_folder, f"{os.path.splitext(os.path.basename(image_path))[0]}_face_{idx}.jpg"
                )
                print(face_filename)
                cv2.imwrite(face_filename, cropped_face)

    if return_boxes:
        return cropped_faces, face_boxes
    return cropped_faces


In [24]:
# Step 1: Load the pre-trained VGG model

# Load VGG model for embeddings
model = "VGG-Face"  # You can also use 'Facenet512' for higher accuracy

def get_embeddings(face_images):
    embeddings = []
    for face_img in face_images:
        face_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)  # Convert to RGB
        embedding = DeepFace.represent(img_path=face_rgb, model_name=model,enforce_detection=False)[0]['embedding']
        embeddings.append(np.array(embedding, dtype=np.float32))
    return embeddings


In [25]:
def process_and_store_images(image_folder, face_folder, index, faiss_index_path, metadata_path):
    """
    Process images to detect faces, save cropped faces, and store embeddings in FAISS and metadata.

    Args:
        image_folder (str): Path to the folder containing input images.
        face_folder (str): Path to the folder to save cropped faces.
        index (faiss.IndexFlatL2): FAISS index for storing embeddings.
        faiss_index_path (str): Path to save the FAISS index.
        metadata_path (str): Path to save photo ID metadata.

    Returns:
        None
    """
    os.makedirs(face_folder, exist_ok=True)  # Ensure the face folder exists

    photo_ids = []  # Keep track of photo IDs, cropped face filenames, and embeddings

    for photo_id, image_file in enumerate(os.listdir(image_folder)):
        image_path = os.path.join(image_folder, image_file)
        print(image_path)
        faces = detect_and_crop_faces(image_path, face_folder, return_boxes=False)
        if faces:
            embeddings = get_embeddings(faces)
            for idx, (face, embedding) in enumerate(zip(faces, embeddings)):
                # Save the embedding to the FAISS index
                index.add(np.expand_dims(embedding, axis=0))
                
                # Save the cropped face image
                cropped_face_filename = f"{os.path.splitext(image_file)[0]}_face_{idx}.jpg"
                cropped_face_path = os.path.join(face_folder, cropped_face_filename)
                cv2.imwrite(cropped_face_path, face)

                # Add metadata entry
                photo_ids.append({
                    "original_image": image_file,
                    "cropped_face": cropped_face_filename,
                    "embedding": embedding.tolist()  # Convert embedding to a list for JSON serialization
                })
    
    # Save the FAISS index
    faiss.write_index(index, faiss_index_path)
    print(f"FAISS index saved to {faiss_index_path}")

    # Save metadata as JSON
    with open(metadata_path, 'w') as metadata_file:
        json.dump(photo_ids, metadata_file, indent=4)
    print(f"Metadata saved to {metadata_path}")

    print(f"Processed {len(photo_ids)} faces.")


In [26]:
def load_faiss_and_metadata(faiss_index_path, metadata_path):
    """
    Load the FAISS index and metadata for reuse.

    Args:
        faiss_index_path (str): Path to the FAISS index.
        metadata_path (str): Path to the metadata file.

    Returns:
        tuple: Loaded FAISS index and metadata.
    """
    # Load FAISS index
    index = faiss.read_index(faiss_index_path)
    print(f"Loaded FAISS index with {index.ntotal} embeddings.")

    # Load metadata
    with open(metadata_path, 'r') as metadata_file:
        photo_ids = json.load(metadata_file)
    print(f"Loaded metadata for {len(photo_ids)} faces.")

    return index, photo_ids


In [27]:
# Define the FAISS index
embedding_dimension = 4096  # FaceNet output dimension
index = faiss.IndexFlatL2(embedding_dimension)  # L2 (Euclidean) distance

# Folder and file paths
image_folder = "oscar/photos"
face_folder = "oscar/cropped_faces"
faiss_index_path = "oscar/faiss_index.bin"
metadata_path = "oscar/photo_ids.json"

# Process images and store data
process_and_store_images(image_folder, face_folder, index, faiss_index_path, metadata_path)


oscar/photos/.DS_Store
Could not read oscar/photos/.DS_Store
oscar/photos/8.jpg
oscar/cropped_faces/8_face_0.jpg
oscar/cropped_faces/8_face_1.jpg
oscar/photos/9.jpg
oscar/cropped_faces/9_face_0.jpg
oscar/cropped_faces/9_face_1.jpg
oscar/cropped_faces/9_face_2.jpg
oscar/cropped_faces/9_face_3.jpg
oscar/cropped_faces/9_face_4.jpg
oscar/cropped_faces/9_face_5.jpg
oscar/cropped_faces/9_face_6.jpg
oscar/cropped_faces/9_face_7.jpg
oscar/photos/12.jpg
oscar/cropped_faces/12_face_0.jpg
oscar/cropped_faces/12_face_1.jpg
oscar/cropped_faces/12_face_2.jpg
oscar/cropped_faces/12_face_3.jpg
oscar/photos/13.jpg
oscar/cropped_faces/13_face_0.jpg
oscar/cropped_faces/13_face_1.jpg
oscar/photos/11.jpg
oscar/cropped_faces/11_face_0.jpg
oscar/photos/10.jpg
oscar/cropped_faces/10_face_0.jpg
oscar/cropped_faces/10_face_1.jpg
oscar/cropped_faces/10_face_2.jpg
oscar/cropped_faces/10_face_3.jpg
oscar/photos/4.jpg
oscar/cropped_faces/4_face_0.jpg
oscar/photos/5.jpg
oscar/cropped_faces/5_face_0.jpg
oscar/cropped

In [29]:
def search_similar_faces(query_image, faiss_index_path, metadata_path, top_k=5):
    """
    Search for similar faces to the query image using the stored FAISS index and metadata.

    Args:
        query_image (str): Path to the query image.
        faiss_index_path (str): Path to the FAISS index file.
        metadata_path (str): Path to the metadata JSON file.
        top_k (int): Number of top matches to retrieve.

    Returns:
        None
    """
    # Load the FAISS index
    index = faiss.read_index(faiss_index_path)
    print(f"Loaded FAISS index from {faiss_index_path}")

    # Load the photo IDs metadata
    with open(metadata_path, 'r') as metadata_file:
        photo_ids = json.load(metadata_file)
    print(f"Loaded metadata from {metadata_path}")

    # Detect faces in the query image
    faces = detect_and_crop_faces(query_image)
    if not faces:
        print("No face detected in query image.")
        return

    # Generate embeddings for the detected faces
    query_embeddings = get_embeddings(faces)

    # Search the FAISS index for each query embedding
    for query_embedding in query_embeddings:
        distances, indices = index.search(np.expand_dims(query_embedding, axis=0), top_k)
        print("Top Matches:")
        for dist, idx in zip(distances[0], indices[0]):
            if idx >= 0:
                match_metadata = photo_ids[idx]
                print(f"Original Image: {match_metadata['original_image']}, "
                      f"Cropped Face: {match_metadata['cropped_face']}, Distance: {dist}")


In [30]:
# Paths to stored data
faiss_index_path = "oscar/faiss_index.bin"
metadata_path = "oscar/photo_ids.json"

# Query image
query_image = "oscar/cropped_faces/9_face_2.jpg"

# Search for similar faces
search_similar_faces(query_image, faiss_index_path, metadata_path, top_k=5)


Loaded FAISS index from oscar/faiss_index.bin
Loaded metadata from oscar/photo_ids.json
Top Matches:
Original Image: 9.jpg, Cropped Face: 9_face_2.jpg, Distance: 0.022086620330810547
Original Image: 2.jpg, Cropped Face: 2_face_0.jpg, Distance: 0.9272879958152771
Original Image: 3.jpg, Cropped Face: 3_face_0.jpg, Distance: 0.9644865393638611
Original Image: 1.jpg, Cropped Face: 1_face_0.jpg, Distance: 0.9995566606521606
Original Image: 10.jpg, Cropped Face: 10_face_2.jpg, Distance: 1.19015371799469
