In [9]:
import numpy as np
import cv2
import face_recognition
import os
from tqdm import tqdm
import shutil

In [10]:
import logging
import contextlib
import tensorflow as tf
import warnings
from mtcnn import MTCNN
from tqdm import tqdm

# Suppress TensorFlow logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow logs at level 3 (ERROR)
tf.get_logger().setLevel('ERROR')

# Configure logging to suppress logs from TensorFlow and MTCNN
logging.getLogger('tensorflow').setLevel(logging.FATAL)
logging.getLogger('mtcnn').setLevel(logging.ERROR)

# Suppress warnings
warnings.filterwarnings('ignore')

# Function to suppress stdout and stderr
@contextlib.contextmanager
def suppress_output():
    with open(os.devnull, 'w', encoding='utf-8') as fnull:
        with contextlib.redirect_stdout(fnull), contextlib.redirect_stderr(fnull):
            yield

def detect_faces_mtcnn(image):
    """
    Detect faces using MTCNN and return bounding boxes in a format compatible with face_recognition.
    """
    with suppress_output():
        # Initialize the MTCNN detector without printing logs
        detector = MTCNN()

        # Detect faces in the image
        faces = detector.detect_faces(image)

        # Filter faces with confidence higher than 0.70 and extract their bounding boxes
        rectangles = []
        for face in faces:
            if face['confidence'] > 0.50:
                x, y, width, height = face['box']
                # Convert to top, right, bottom, left format
                top, right, bottom, left = y, x + width, y + height, x
                rectangles.append((top, right, bottom, left))
                
        return rectangles

In [11]:

def get_embeddings(rgb_image, face_locations):
    """
    Extract face embeddings from the given image based on face locations.
    Args:
    rgb_image (np.array): RGB image from which to extract embeddings.
    face_locations (list): List of face locations in (top, right, bottom, left) format.
    Returns:
    list: List of face encodings.
    """
    if not face_locations:
        return []
    # Compute face embeddings
    face_encodings = face_recognition.face_encodings(rgb_image, face_locations)
    return face_encodings

def process_images(dirpath):
    """
    Process images in the given directory to extract face embeddings.
    Args:
    dirpath (str): Path to the directory containing images.
    Returns:
    list, list: List of embeddings and list of corresponding image paths.
    """
    embeddings = []
    image_paths = []

    # Iterate over all files in the directory
    for filename in tqdm(os.listdir(dirpath)):
        file_path = os.path.join(dirpath, filename)
        image = cv2.imread(file_path)

        # Check if the image was loaded successfully
        if image is None:
            print(f"Error: Could not load image from {file_path}")
            continue

        # Convert the image from BGR to RGB for face_recognition
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Detect faces and extract face images
        face_locations = detect_faces_mtcnn(rgb_image)

        # Extract embeddings for each detected face
        face_encodings = get_embeddings(rgb_image, face_locations)
    
        for encoding in face_encodings:
            embeddings.append(encoding)
            image_paths.append(file_path)

    return embeddings, image_paths


In [28]:
embeddings[3]

array([-2.19271317e-01,  1.58968139e-02, -1.17288902e-02, -4.92642969e-02,
       -6.46822378e-02, -3.95706296e-02, -6.08498417e-02, -1.22916006e-01,
        1.32741436e-01, -1.30527020e-01,  2.03926116e-01, -2.18902752e-02,
       -1.91356286e-01, -1.17518902e-01,  9.75560397e-04,  1.03699826e-01,
       -1.30550593e-01, -1.79571718e-01, -3.12549174e-02, -9.11375582e-02,
        4.90420051e-02, -8.00062567e-02, -1.26742553e-02,  1.26451433e-01,
       -2.80507535e-01, -3.16164911e-01, -1.23431042e-01, -9.27998275e-02,
        3.99659798e-02, -1.11172020e-01,  6.00379985e-03,  2.05941740e-02,
       -2.44743079e-01, -6.33531213e-02,  1.97250545e-02,  1.84824437e-01,
        6.26464710e-02,  3.50554734e-02,  1.88893035e-01, -1.96172800e-02,
       -1.36762396e-01, -9.26073175e-03,  1.02232352e-01,  2.61798471e-01,
        1.56981736e-01,  7.86026567e-02,  4.58652712e-02,  6.24634326e-04,
        1.13474488e-01, -2.45721877e-01,  6.65472299e-02,  6.62138015e-02,
        1.78884417e-01,  

In [12]:
dir= 'train4'
embeddings,image_paths = process_images(dir)

100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [01:22<00:00,  6.35s/it]


In [34]:
from sklearn.cluster import DBSCAN
eps = 0.5  # The maximum distance between two samples for one to be considered as in the neighborhood of the other
min_samples = 2  # The number of samples (or total weight) in a neighborhood for a point to be considered as a core point
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
cluster_labels = dbscan.fit_predict(embeddings)


In [35]:
# from sklearn.cluster import KMeans

# num_clusters = 5

# # Apply K-means clustering on extracted features
# kmeans = KMeans(n_clusters=num_clusters, random_state=42)
# cluster_labels = kmeans.fit_predict(embeddings)

In [36]:
cluster_labels

array([ 0,  0,  1,  1,  1,  1,  0, -1, -1,  1,  0, -1,  0,  1,  0,  0, -1,
        0,  1,  1], dtype=int64)

In [43]:
base_cluster_dir = f'Clusters_{dir}'

# Iterate over the image paths and cluster labels simultaneously
for i, image_path in enumerate(image_paths):
    cluster_label = cluster_labels[i]
    
    # Determine the destination cluster folder
    cluster_folder = f'{base_cluster_dir}/Cluster_{cluster_label}/'
    
    # Create the cluster directory if it doesn't exist
    os.makedirs(cluster_folder, exist_ok=True)
    
    # Get the filename from the image path
    filename = os.path.basename(image_path)
    
    # Determine the destination path
    destination_path = os.path.join(cluster_folder, filename)
    
    try:
        # Check if the file exists at the source path
        if not os.path.exists(image_path):
            print(f"Error: Source file '{image_path}' not found")
            continue
        
        # Perform the copy operation
        shutil.copyfile(image_path, destination_path)
        print(f"Copied '{filename}' to '{cluster_folder}'")

    except FileExistsError:
        print(f"Error: '{filename}' already exists in '{cluster_folder}'")
    except Exception as e:
        print(f"Unexpected error while copying '{filename}': {str(e)}")

print("\n\nProcessing completed.")

Copied '1615919242148.jpg' to 'Clusters_train4/Cluster_0/'
Copied '1615919242221.jpg' to 'Clusters_train4/Cluster_0/'
Copied '1628522161324.jpg' to 'Clusters_train4/Cluster_1/'
Copied '1628594944197.jpg' to 'Clusters_train4/Cluster_1/'
Copied '1628594944261.jpg' to 'Clusters_train4/Cluster_1/'
Copied '1640669698002.jpg' to 'Clusters_train4/Cluster_1/'
Copied '1640669698002.jpg' to 'Clusters_train4/Cluster_0/'
Copied '1640669698002.jpg' to 'Clusters_train4/Cluster_-1/'
Copied '1640669698002.jpg' to 'Clusters_train4/Cluster_-1/'
Copied '1686134879804.jpg' to 'Clusters_train4/Cluster_1/'
Copied '1686134879804.jpg' to 'Clusters_train4/Cluster_0/'
Copied '1686134879804.jpg' to 'Clusters_train4/Cluster_-1/'
Copied '1686134879824.jpg' to 'Clusters_train4/Cluster_0/'
Copied '1686134879824.jpg' to 'Clusters_train4/Cluster_1/'
Copied '8deVjGlgsFmydcYzXASRyOXUGMV.jpg' to 'Clusters_train4/Cluster_0/'
Copied '8dYyRetSRJzwVLMxhuSAhRKvJst.jpg' to 'Clusters_train4/Cluster_0/'
Copied '8dYyRetSRJzwVLMxh