<a href="https://colab.research.google.com/github/khuramgill/Face-Embaddings/blob/main/Face_Embaddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install deepface
!pip install pinecone-client
!pip install tqdm

Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting lz4>=4.3.3 (from mtcnn>=0.1.0->deepface)
  Downloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading deepface-0.0.93-py3-none-any.whl (108 kB)
[2K   [90m━

**Import Libraries**

In [2]:
import os
import numpy as np
from deepface import DeepFace
from typing import List, Dict, Tuple
import pinecone
from datetime import datetime
import uuid
from google.colab import files
import matplotlib.pyplot as plt
from tqdm import tqdm
import cv2
import json

24-11-12 11:53:31 - Directory /root/.deepface has been created
24-11-12 11:53:31 - Directory /root/.deepface/weights has been created


# **FaceEmbeddingSystem**

In [3]:
class FaceEmbeddingSystem:
    def __init__(
        self,
        pinecone_api_key: str,
        pinecone_environment: str,
        index_name: str,
        model_name: str = "VGG-Face",  # Options: "VGG-Face", "Facenet", "Facenet512", "OpenFace", "DeepFace", "DeepID", "ArcFace", "Dlib"
        dimension: int = 2622  # Dimension varies by model: VGG-Face=2622, Facenet=128, Facenet512=512
    ):
        """
        Initialize the face embedding system

        Args:
            pinecone_api_key: Your Pinecone API key
            pinecone_environment: Pinecone environment
            index_name: Name for Pinecone index
            model_name: Name of the face recognition model to use
            dimension: Embedding dimension (depends on model)
        """
        self.model_name = model_name

        # Initialize Pinecone
        pinecone.init(api_key=pinecone_api_key, environment=pinecone_environment)

        # Create index if it doesn't exist
        if index_name not in pinecone.list_indexes():
            pinecone.create_index(
                name=index_name,
                dimension=dimension,
                metric="cosine"
            )

        self.index = pinecone.Index(index_name)

    def visualize_faces(self, image_path: str):
        """
        Detect and visualize faces in an image
        """
        # Read image
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Detect faces
        faces = DeepFace.extract_faces(
            img_path=image_path,
            target_size=(224, 224),
            detector_backend='opencv'
        )

        # Draw rectangles around faces
        for face in faces:
            facial_area = face['facial_area']
            x = facial_area['x']
            y = facial_area['y']
            w = facial_area['w']
            h = facial_area['h']
            cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)

        # Display image
        plt.figure(figsize=(12, 8))
        plt.imshow(img)
        plt.axis('off')
        plt.show()

        return len(faces)

    def process_image(self, image_path: str) -> Tuple[List[np.ndarray], Dict]:
        """
        Process image and extract face embeddings
        """
        try:
            # Extract faces and embeddings
            embeddings = DeepFace.represent(
                img_path=image_path,
                model_name=self.model_name,
                detector_backend='opencv'
            )

            # If single embedding is returned, convert to list
            if isinstance(embeddings, dict):
                embeddings = [embeddings]

            # Extract embedding vectors
            embedding_vectors = [emb['embedding'] for emb in embeddings]

            # Create metadata
            metadata = {
                "original_image": image_path,
                "timestamp": datetime.now().isoformat(),
                "num_faces": len(embedding_vectors),
                "model_name": self.model_name
            }

            return embedding_vectors, metadata

        except Exception as e:
            print(f"Error processing image {image_path}: {str(e)}")
            return [], {}

    def store_embeddings(
        self,
        embeddings: List[np.ndarray],
        metadata: Dict,
        event_id: str = None
    ) -> List[str]:
        """
        Store face embeddings in Pinecone
        """
        vectors = []
        ids = []

        # Process each face embedding
        for idx, embedding in enumerate(embeddings):
            # Generate unique ID
            face_id = str(uuid.uuid4())
            ids.append(face_id)

            # Prepare metadata
            face_metadata = {
                **metadata,
                "face_index": idx,
                "event_id": event_id
            }

            # Convert embedding to list if necessary
            embedding_list = embedding.tolist() if isinstance(embedding, np.ndarray) else embedding

            # Prepare vector
            vectors.append({
                "id": face_id,
                "values": embedding_list,
                "metadata": face_metadata
            })

        # Upsert vectors in batches
        batch_size = 100
        for i in range(0, len(vectors), batch_size):
            batch = vectors[i:i + batch_size]
            self.index.upsert(vectors=batch)

        return ids

**upload_and_process_images**

In [4]:
def upload_and_process_images():
    """Upload images through Colab interface"""
    print("Please upload your images:")
    uploaded = files.upload()

    # Create directory if it doesn't exist
    if not os.path.exists('uploaded_images'):
        os.makedirs('uploaded_images')

    # Save uploaded files
    for filename, content in uploaded.items():
        with open(os.path.join('uploaded_images', filename), 'wb') as f:
            f.write(content)

    return list(uploaded.keys())

# **Main**

# **Clean/Working Code**

In [25]:
# Step 1: Install necessary libraries
!pip install keras_facenet opencv-python-headless
!pip install opencv-python scikit-learn keras_facenet matplotlib
!pip install tk  # for file dialog


Collecting tk
  Downloading tk-0.1.0-py3-none-any.whl.metadata (693 bytes)
Downloading tk-0.1.0-py3-none-any.whl (3.9 kB)
Installing collected packages: tk
Successfully installed tk-0.1.0


test code


In [26]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
from keras_facenet import FaceNet
import matplotlib.pyplot as plt
from tkinter import Tk
from tkinter.filedialog import askopenfilename

# Initialize FaceNet model
embedder = FaceNet()

# Function to load and preprocess images
def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image not found at path: {image_path}")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (160, 160))
    return img

# Function to let the user select an image file
def select_image():
    # Hide the Tkinter root window
    Tk().withdraw()
    # Open a file dialog and return the selected file path
    file_path = askopenfilename(title="Select Image", filetypes=[("Image Files", "*.jpg;*.jpeg;*.png")])
    if not file_path:
        print("No file selected")
    return file_path

# Select an image interactively
search_img_path = select_image()  # Open file dialog to select image

if search_img_path:  # Proceed only if a file is selected
    try:
        # Load and preprocess the selected image
        search_img = load_and_preprocess_image(search_img_path)
        search_embedding = np.array(embedder.embeddings([search_img]), dtype=np.float64)[0]

        # Step 2: Extract features using FaceNet
        # List of image file paths (Ensure paths are correct)
        images = [
            '/content/sample_data/19.jpg',
            '/content/sample_data/33.jpg'  # Add more paths as needed
        ]

        # Load and preprocess images
        preprocessed_images = [load_and_preprocess_image(img_path) for img_path in images]

        # Step 3: Extract features for all images
        features = np.array(embedder.embeddings(preprocessed_images), dtype=np.float64)

        # Perform K-means clustering
        num_clusters = min(5, len(features))  # Set to min(5, number of images) to avoid cluster errors
        kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(features)

        # Step 4: To search for a specific face
        # Predict the cluster for the search image
        search_cluster = kmeans.predict([search_embedding])[0]

        # Retrieve all images in the same cluster
        cluster_images = [images[i] for i in range(len(images)) if kmeans.labels_[i] == search_cluster]

        # Function to display images in a cluster
        def display_images(image_paths, title):
            plt.figure(figsize=(10, 10))
            for i, img_path in enumerate(image_paths):
                plt.subplot(1, len(image_paths), i + 1)
                img = load_and_preprocess_image(img_path)
                plt.imshow(img)
                plt.axis('off')
            plt.suptitle(title)
            plt.show()

        # Display the search result cluster
        display_images(cluster_images, f'Cluster for Search Image: {search_img_path}')

    except FileNotFoundError as e:
        print(e)


TclError: no display name and no $DISPLAY environment variable

In [None]:
# Step 1: Import libraries
import cv2
import numpy as np
from sklearn.cluster import KMeans
from keras_facenet import FaceNet
import matplotlib.pyplot as plt

# Initialize FaceNet model
embedder = FaceNet()

# Function to load and preprocess images
def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image not found at path: {image_path}")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (160, 160))
    return img

# List of image file paths (Ensure paths are correct)
images = [
    '/content/sample_data/19.jpg',
    '/content/sample_data/33.jpg'    # Add more paths as needed
]

# Load and preprocess images
preprocessed_images = [load_and_preprocess_image(img_path) for img_path in images]

# Step 2: Extract features using FaceNet
# Convert features to float64 for compatibility with KMeans
features = np.array(embedder.embeddings(preprocessed_images), dtype=np.float64)

# Step 3: Perform K-means clustering
num_clusters = min(5, len(features))  # Set to min(5, number of images) to avoid cluster errors
kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(features)

# Function to display images in a cluster
def display_images(image_paths, title):
    plt.figure(figsize=(10, 10))
    for i, img_path in enumerate(image_paths):
        plt.subplot(1, len(image_paths), i + 1)
        img = load_and_preprocess_image(img_path)
        plt.imshow(img)
        plt.axis('off')
    plt.suptitle(title)
    plt.show()


# Step 4: To search for a specific face
search_img_path = '/content/sample_data/4.jpg'  # Update with the path of the search image
try:
    search_img = load_and_preprocess_image(search_img_path)
    search_embedding = np.array(embedder.embeddings([search_img]), dtype=np.float64)[0]

    # Predict the cluster for the search image
    search_cluster = kmeans.predict([search_embedding])[0]

    # Retrieve all images in the same cluster
    cluster_images = [images[i] for i in range(len(images)) if kmeans.labels_[i] == search_cluster]

    # Display the search result cluster
    display_images(cluster_images, f'Cluster for Search Image: {search_img_path}')

except FileNotFoundError as e:
    print(e)


**Clean Code Above**

In [14]:
def main():
    # Get Pinecone credentials
    PINECONE_API_KEY = input("Enter your Pinecone API key: ")
    PINECONE_ENV = input("Enter your Pinecone environment: ")
    INDEX_NAME = "face-embeddings"

    # Choose face recognition model
    MODEL_NAME = "VGG-Face"  # Default model

    # Initialize system
    system = FaceEmbeddingSystem(
        pinecone_api_key=PINECONE_API_KEY,
        pinecone_environment=PINECONE_ENV,
        index_name=INDEX_NAME,
        model_name=MODEL_NAME
    )

    # Upload and process images
    print("\nUpload your images:")
    uploaded_files = upload_and_process_images()

    # Get event ID (optional)
    event_id = input("\nEnter an event ID (or press Enter to skip): ")
    if not event_id:
        event_id = None

    # Process each image
    results = {}
    for filename in uploaded_files:
        image_path = os.path.join('uploaded_images', filename)
        try:
            print(f"\nProcessing {filename}:")

            # Visualize faces
            num_faces = system.visualize_faces(image_path)
            print(f"Found {num_faces} faces")

            # Get embeddings and store them
            embeddings, metadata = system.process_image(image_path)
            if embeddings:
                face_ids = system.store_embeddings(embeddings, metadata, event_id)
                results[filename] = face_ids
                print(f"Successfully stored {len(face_ids)} face embeddings")
            else:
                results[filename] = []
                print("No faces processed in this image")

        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")
            results[filename] = []

    # Print final results
    print("\nProcessing Summary:")
    for filename, face_ids in results.items():
        print(f"\nImage: {filename}")
        print(f"Face IDs: {face_ids}")

if __name__ == "__main__":
    main()

Enter your Pinecone API key: pcsk_2agnRW_PJGcuYNduPh4LXPttu1bnT31UninNBGbyZqKDUiCDAxdY7asgDJkLHmR3KCDJzP
Enter your Pinecone environment: us-west-2


TypeError: FaceEmbeddingSystem.__init__() got an unexpected keyword argument 'pinecone_api_key'