In [132]:
import degirum as dg, degirum_tools
import cv2, numpy as np

In [133]:
# choose inference host address
inference_host_address = "@cloud" 
# inference_host_address = "@local"

# choose zoo_url
zoo_url = "degirum/models_hailort"
# zoo_url = "<path to local folder>"

# set token
token = degirum_tools.get_token()
# token = '' # leave empty for local inference

# choose image source
image_source = "../assets/Friends.jpg"

**Face Detection and Keypoint Estimation with DeGirum PySDK** 

It loads a YOLOv8 face detection model with keypoint estimation and applies it to an input image. The detected faces and their corresponding keypoints are displayed visually, providing insights into facial regions. This setup is ideal for tasks such as face analysis, landmark detection, or pose estimation.

In [None]:
face_det_kypts_model_name = "yolov8n_relu6_widerface_kpts--640x640_quant_hailort_hailo8l_1"

# load AI model
face_det_kypts_model = dg.load_model(
    model_name=face_det_kypts_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token
)

# perform AI model inference on given image source
print(f" Running inference using '{face_det_kypts_model_name}' on image source '{image_source}'")
face_det_kypts_inference_result = face_det_kypts_model(image_source)

print('Inference Results \n', face_det_kypts_inference_result)  # Detection Results with keypoints

# Show results of inference
with degirum_tools.Display("AI Camera") as output_display:
    output_display.show_image(face_det_kypts_inference_result)
    
print("Press 'x' or 'q' to stop.")


**Face Re-identification with DeGirum PySDK**

This script loads a YOLOv8-based face re-identification (ReID) model and applies it to the face crops obtained from YOLOv8 face detection model with keypoint estimation. The model extracts extracts unique features/embeddings from each cropped face.

The output of the ReID model is a 512-dimensional embedding, which encapsulates the unique features of each cropped face, enabling precise identification and comparison across different images.

In [None]:
face_reid_model_name = "arcface_mobilefacenet--112x112_quant_hailort_hailo8_2"
zoo_url = "degirum/sandbox_shashi"

# load AI model
face_reid_model = dg.load_model(
    model_name=face_reid_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token
)
image = face_det_kypts_inference_result.image

cropped_faces=[]
for face in face_det_kypts_inference_result.results:
    x1, y1, x2, y2 = map(int, face["bbox"])
    # Crop the face from the image using corner coordinates
    cropped_face = image[y1:y2, x1:x2] # for opencv
    # cropped_face = image.crop((x1, y1, x2, y2)) # for pil
    # Append the cropped face to the list
    cropped_faces.append(cropped_face)

# Display all cropped faces
for idx, cropped_face in enumerate(cropped_faces):
    # Display each cropped face using OpenCV
    cv2.imshow(f"Cropped Face {idx + 1}", cropped_face)

cv2.waitKey(0)  # Wait for a key press to close the windows
cv2.destroyAllWindows()  # Close all OpenCV windows

print(f" Running inference using '{face_reid_model_name}' on cropped face")
face_reid_inference_result = face_reid_model(cropped_faces[0]) # Embedding for a single cropped face
print ('Shape of a face embedding:', len(face_reid_inference_result.results[0]["data"][0])) # Print the length of the embedding for a single face.

**Align and Crop**

The `align_and_crop` function is designed to align and crop a face from an image based on a given set of landmarks. This is particularly useful in facial recognition tasks, where precise alignment of the face is necessary for accurate feature extraction.


In [137]:
from skimage import transform as trans
import numpy as np
import cv2

def align_and_crop(img, landmarks, image_size=112):
    """
    Align and crop the face from the image based on the given landmarks.

    Args:
        img (np.ndarray): The full image (not the cropped bounding box). This image will be transformed.
        landmarks (List[np.ndarray]): List of 5 keypoints (landmarks) as (x, y) coordinates. These keypoints typically include the eyes, nose, and mouth.
        image_size (int, optional): The size to which the image should be resized. Defaults to 112. It is typically either 112 or 128 for face recognition models.

    Returns:
        Tuple[np.ndarray, np.ndarray]: The aligned face image and the transformation matrix.
    """
    # Define the reference keypoints used in ArcFace model, based on a typical facial landmark set.
    _arcface_ref_kps = np.array(
        [
            [38.2946, 51.6963],  # Left eye
            [73.5318, 51.5014],  # Right eye
            [56.0252, 71.7366],  # Nose
            [41.5493, 92.3655],  # Left mouth corner
            [70.7299, 92.2041],  # Right mouth corner
        ],
        dtype=np.float32,
    )

    # Ensure the input landmarks have exactly 5 points (as expected for face alignment)
    assert len(landmarks) == 5

    # Validate that image_size is divisible by either 112 or 128 (common image sizes for face recognition models)
    assert image_size % 112 == 0 or image_size % 128 == 0

    # Adjust the scaling factor (ratio) based on the desired image size (112 or 128)
    if image_size % 112 == 0:
        ratio = float(image_size) / 112.0
        diff_x = 0  # No horizontal shift for 112 scaling
    else:
        ratio = float(image_size) / 128.0
        diff_x = 8.0 * ratio  # Horizontal shift for 128 scaling

    # Apply the scaling and shifting to the reference keypoints
    dst = _arcface_ref_kps * ratio
    dst[:, 0] += diff_x  # Apply the horizontal shift

    # Estimate the similarity transformation matrix to align the landmarks with the reference keypoints
    tform = trans.SimilarityTransform()
    tform.estimate(np.array(landmarks), dst)
    
    # Extract the affine transformation matrix (2x3) from the transformation object
    M = tform.params[0:2, :]  # This is a 2x3 matrix for the affine transformation

    # Apply the affine transformation to the input image to align the face
    aligned_img = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)

    return aligned_img, M


#### Display the Aligned Faces

In [None]:
aligned_faces=[]

for idx, result in enumerate(face_det_kypts_inference_result.results):
    landmarks = [landmark["landmark"] for landmark in result["landmarks"]]
    aligned_face, _ = align_and_crop(image, landmarks)
    aligned_faces.append(aligned_face)
        
# Display the concatenated aligned faces horizontally
cv2.imshow('Aligned Faces', cv2.hconcat(aligned_faces))
cv2.waitKey(0)
cv2.destroyAllWindows()
print("Press 'x' or 'q' to stop.")

**Similarity between two embeddings**

In Face recognition, embeddings are numerical representations of a person's face. These embeddings capture the unique features of a face in a high-dimensional vector space, where faces that are similar will be closer together.

To measure the similarity between two face embeddings, the most common approach is to calculate a distance metric between the two vectors. The closer the vectors are in space, the more similar the faces are.
Two widely used metrics for this purpose are: 
1. Cosine Similarity  
2. Euclidean Distance

In [None]:
# Consider two different faces 
input_image1 = "../assets/Friends_dataset/Chandler_1.jpg"
input_image2 = "../assets/Friends_dataset/Joey_1.jpg"

# Run Face detection and keypoints model on the first input image (Chandler_1.jpg)
face_det_kypts_inference_result1 = face_det_kypts_model(input_image1)

# Extract the facial landmarks for alignment
landmarks = [landmark["landmark"] for landmark in face_det_kypts_inference_result1.results[0]["landmarks"]]
aligned_img, _ = align_and_crop(face_det_kypts_inference_result1.image, landmarks)  # Align and crop the face

# Generate the face embedding using the face re-identification model
face_reid_inference_result1 = face_reid_model(aligned_img)
embedding1 =  np.array(face_reid_inference_result1.results[0]["data"][0])


# Run Face detection and keypoints model on second input image (Joey_1.jpg)
face_det_kypts_inference_result2 = face_det_kypts_model(input_image2)

# Extract the facial landmarks for alignment
landmarks = [landmark["landmark"] for landmark in face_det_kypts_inference_result2.results[0]["landmarks"]]
aligned_img, _ = align_and_crop(face_det_kypts_inference_result2.image, landmarks)  # Align and crop the face

# Generate the face embedding using the face re-identification model
face_reid_inference_result2 = face_reid_model(aligned_img)
embedding2 =  np.array(face_reid_inference_result2.results[0]["data"][0])

from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity([embedding1], [embedding2])

In [None]:
# Consider two same faces
input_image1 = "../assets/Friends_dataset/Joey_1.jpg"
input_image2 = "../assets/Friends_dataset/Joey_2.jpg"

# Run Face detection and keypoints model on the first input image (Joey_1.jpg)
face_det_kypts_inference_result1 = face_det_kypts_model(input_image1)

# Extract the facial landmarks for alignment
landmarks = [landmark["landmark"] for landmark in face_det_kypts_inference_result1.results[0]["landmarks"]]
aligned_img, _ = align_and_crop(face_det_kypts_inference_result1.image, landmarks)  # Align and crop the face

# Generate the face embedding using the face re-identification model
face_reid_inference_result1 = face_reid_model(aligned_img)
embedding1 =  np.array(face_reid_inference_result1.results[0]["data"][0])


# Run Face detection and keypoints model on second input image (Joey_2.jpg)
face_det_kypts_inference_result2 = face_det_kypts_model(input_image2)

# Extract the facial landmarks for alignment
landmarks = [landmark["landmark"] for landmark in face_det_kypts_inference_result2.results[0]["landmarks"]]
aligned_img, _ = align_and_crop(face_det_kypts_inference_result2.image, landmarks)  # Align and crop the face

# Generate the face embedding using the face re-identification model
face_reid_inference_result2 = face_reid_model(aligned_img)
embedding2 =  np.array(face_reid_inference_result2.results[0]["data"][0])

from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity([embedding1], [embedding2])


In the above similarity calculation, the two faces that are similar will produce a higher similarity score, indicating a closer match, while faces that are distinct will yield a lower similarity score, reflecting the greater dissimilarity between them.

## Database Indexing

**Database indexing** is a technique used to improve the speed of data retrieval operations on a database table. 
Indexes allow for quick access to data without the need to scan every row, making data retrieval more efficient. 
In the context of modern vector databases like LanceDB, indexing is crucial for fast querying, especially when dealing with large datasets, such as embeddings for machine learning or image recognition tasks.

The components in database indexing are:

    1. Creating a new database table / Use the existing table
    2. Database Schema
    3. Adding data to the database

**Database Configuration**

This section defines the key parameters for working with a database like the database **URI** is used to establish a connection to the database. 
The **table_name** is the new table/existing table within the database where embeddings, identities, and associated metadata are stored. Each entry in the table corresponds to a unique face and its corresponding data, and the **input_path** provides the location of the image data used to generate embeddings, 

In [141]:
# URI of the database where face data is stored. 
uri = "face_database"

# Name of the table in the database that stores information such as facial embeddings, identities, and other related metadata.
table_name = "face"

# Path to the directory containing the sample dataset for indexing.
input_path = "../assets/Friends_dataset"


The **FaceRecognitionSchema** in LanceDB defines the structure and data types for storing face recognition-related data, such as face embeddings, identities, and metadata. This schema is used to ensure consistency when storing and querying face recognition data within a LanceDB database.

In [142]:
from lancedb.pydantic import LanceModel, Vector
import uuid

# Define the Lance schema for face recognition
class FaceRecognitionSchema(LanceModel):
    id: str  # Unique identifier for each entry
    vector: Vector(512)  # Face embeddings, fixed size of 512
    image_path: str = "image_path"  # Default image path
    entity_name: str = "default"  # Default entity name
    bbox: Vector(4)  # Bounding box with 4 dimensions (x, y, width, height)
    source: int = 0  # Source , default is 0

    @classmethod
    def format_data(cls, result, image_path, entity_name) -> 'FaceRecognitionSchema':
        """Converts the result to a FaceRecognitionSchema instance.

        Args:
            result: A list of results containing embeddings and bounding box data.
            image_path: The path to the image associated with the entries.
            entity_name: Optional name for the entity; defaults to None.

        Returns:
            A list of FaceRecognitionSchema instances.
        """

        data = [
            cls(
                id=str(uuid.uuid4()),  # Generate a unique ID for each entry
                vector=np.array(res["embedding"], dtype=np.float32),  # Convert embedding to a NumPy array with float32 dtype
                image_path=image_path,  # Set the image path
                entity_name=entity_name,  # Set the entity name, or use the default
                bbox=np.array(res["bbox"], dtype=np.float32)  # Convert bounding box to a NumPy array with float32 dtype
            )
            for res in result.results if "embedding" in res
        ]
        return data

In [143]:
# Load the configuration
import lancedb
# Connect to the LanceDB database
db = lancedb.connect(uri=uri)

# Check if the table exists, create if not
if table_name not in db.table_names():
    """Create a new table in the database."""
    tbl = db.create_table(table_name, schema=FaceRecognitionSchema)
else:
    """Open an existing table in the database."""
    tbl = db.open_table(table_name)
    schema_fields = [field.name for field in tbl.schema]
    if schema_fields != list(FaceRecognitionSchema.model_fields.keys()):
        raise RuntimeError(
            f"Table {table_name} has a different schema."
        )

The following code processes face images stored in a specified directory and applies face detection using face detection+keypoints model, cropping and alignment using the 5 facial landmarks, and feature extraction using the face-reid models. The extracted features are then stored in a LanceDB table with a specific schema for future retrieval and comparison.

In [None]:
from pathlib import Path

path = Path(input_path)
num_entities = 0  # Variable to keep track of the number of entities added to the database
# Iterate over all files in the directory (and subdirectories)
for file in path.rglob("*"):
    # Check if the file is an image (based on extension)
    if file.suffix.lower() in (".png", ".jpg", ".jpeg"):
        entity_name = file.stem.split("_")[0]  # Extract entity name from the file name (first part before "_")
        image_path = str(file)  # Get the path to the image as a string
        
        # Perform face detection and keypoint extraction using the face detection and keypoints model
        face_det_kypts_inference_result = face_det_kypts_model(image_path)
        
        # Iterate through the detected faces in the image
        for result in face_det_kypts_inference_result.results:
            # Extract the facial landmarks for alignment
            landmarks = [landmark["landmark"] for landmark in result["landmarks"]]
            aligned_img, _ = align_and_crop(face_det_kypts_inference_result.image, landmarks)  # Align and crop the face
            
            # Generate the face embedding using the face re-identification model
            face_reid_inference_result = face_reid_model(aligned_img)
            result["embedding"] = face_reid_inference_result.results[0]["data"][0]  # Store the generated embedding
            
        # Format the extracted data (face metadata and embeddings) for storage in the database
        data = FaceRecognitionSchema.format_data(face_det_kypts_inference_result, image_path, entity_name)
        
        # Check if the formatted data is valid (non-empty)
        if len(data) > 0:
            # Add the valid data to the database table
            tbl.add(data=data)
        
        # Increment the number of entities added to the database
        num_entities += len(data)

print(f"Successfully added {num_entities} entities to the {table_name} table.")  # Print number of entities added
print(f"{table_name} table contains a total of {tbl.count_rows()} entities.")  # Print total number of entities in the table


**Face Identification/Recognition**

This typically involves comparing a face captured in an image or video against a database of known faces. The goal is to match the query face with one in the database to either identify the person or authenticate their identity.

Search parameters: 

1. **Top-K** - The Top-K parameter defines the number of closest or most relevant results to return from a search query. In the context of face recognition, this often means retrieving the top K most similar face embeddings to a given query face embedding from the database.
2. **Field_name** - Field_name refers to the specific field or column within the database that will be searched. This could refer to attributes like facial embeddings, identity names, timestamps, or other metadata associated with the faces in the database.
3. **Metric type** - The Metric Type defines the similarity measure used to compare the face embeddings in the database during the search process. It is a critical parameter for determining how the system calculates the "closeness" or "similarity" between faces.


In [145]:
top_k = 1
field_name = "vector"
metric_type = "cosine"

The **process_face_result** function processes a set of face detection results along with the extracted embeddings, searches for the most similar faces in a database, calculates the similarity score (distance), and assigns labels to each face based on the similarity threshold. 

If the calculated similarity score (distance) exceeds a predefined threshold, the detected face is labeled with the identity from the database. Otherwise, the face is labeled as "Unknown". This threshold allows for rejecting faces with low similarity scores.

In [146]:
def process_face_result(tbl, result, field_name, metric_type, top_k, threshold=0.3):
    
    """Process the face result: perform database search, calculate distance, and assign label."""
    for i, res in enumerate(result.results):
        # Perform database search
        search_result = (
            tbl.search(
                np.array(res["embedding"]).astype(np.float32),
                vector_column_name=field_name,  # Specify the name of the vector column in the database
            )
            .metric(metric_type)
            .limit(top_k)
            .to_list()
        )

        # Calculate the distance
        distance = round(1 - search_result[0]["_distance"], 2)

        # Assign a label based on the distance threshold
        if distance >= threshold:
            res["label"] = search_result[0]["entity_name"]
        else:
            res["label"] = "Unknown"

        res["score"] = distance  # Store the calculated similarity score

        # Clean up unnecessary fields
        result.results[i].pop("landmarks", None)
        result.results[i].pop("embedding", None)

    return result

In [147]:
# db = lancedb.connect(uri=uri)
if table_name in db.table_names():
    tbl = db.open_table(table_name)
    schema_fields = [field.name for field in tbl.schema]
    if schema_fields != list(FaceRecognitionSchema.model_fields.keys()):
        raise RuntimeError(f"Table {table_name} has a different schema.")

The following code takes an input image (query image), processes it to extract facial features, and then performs face recognition by comparing the extracted features with stored facial embeddings in a database (LanceDB).
The results are returned as "search_result", which contains the recognized face's label and the calculated similarity score.

In [148]:
det_result = face_det_kypts_model(image_source) 
image = det_result.image
for result in det_result.results:
    landmarks = [landmark["landmark"] for landmark in result["landmarks"]]
    aligned_img, _ = align_and_crop(det_result.image, landmarks)
    face_reid_inference_result = face_reid_model(aligned_img)
    result["embedding"] = face_reid_inference_result.results[0]["data"][0]
    
search_result = process_face_result(tbl, det_result, field_name, metric_type, top_k)
        

## Display

In [149]:
from degirum_tools.ui_support import Display
win_name = f"Annotated Image"
display = Display(win_name)
img = search_result.image_overlay
display.show(img)
# Wait for the user to press a key
while True:
    key = cv2.waitKey(1) & 0xFF  # Wait for key press
    if key == ord('x') or key == ord('q'):
        break  # Exit the loop if 'x' or 'q' is pressed

cv2.destroyAllWindows()  # Close the window