In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from insightface.app import FaceAnalysis
import math
folder_path = "AllClassroomImages/"
model = FaceAnalysis(name='buffalo_l')
model.prepare(ctx_id=0)
face_images = []
face_embeddings = []
face_source = []

for filename in os.listdir(folder_path):
    if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue
    img_path = os.path.join(folder_path, filename)
    img = cv2.imread(img_path)
    if img is None:
        print(f"Failed to load {img_path}")
        continue
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    faces = model.get(img_rgb)
    for face in faces:
        x1, y1, x2, y2 = map(int, face.bbox)
        h, w, _ = img_rgb.shape
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(w, x2), min(h, y2)
        face_crop = img_rgb[y1:y2, x1:x2]
        if face_crop.size == 0:
            continue
        face_images.append(face_crop)
        face_embeddings.append(face.embedding)
        face_source.append(filename)
if len(face_embeddings) == 0:
    print("No faces detected in the provided images!")
    exit()
face_embeddings = np.array(face_embeddings)
db = DBSCAN(eps=0.3, min_samples=2, metric='cosine').fit(face_embeddings)
labels = db.labels_
unique_labels = set(labels)
print("Clusters found (label -1 indicates noise):", unique_labels)
clusters = {}
for label, face_img in zip(labels, face_images):
    if label == -1:
        continue
    clusters.setdefault(label, []).append(face_img)
n_clusters = len(clusters)
if n_clusters == 0:
    print("No clusters found above the threshold.")
else:
    # Create a figure with one row per cluster
    fig, axs = plt.subplots(n_clusters, 1, figsize=(15, 5 * n_clusters))
    if n_clusters == 1:
        axs = [axs]  # Ensure axs is iterable

    for idx, (label, faces_in_cluster) in enumerate(clusters.items()):
        # Resize faces to a common height (e.g., 100 px) while maintaining aspect ratio
        resized_faces = []
        for face in faces_in_cluster:
            h, w, _ = face.shape
            new_h = 100
            new_w = int(w * (new_h / h))
            resized = cv2.resize(face, (new_w, new_h))
            resized_faces.append(resized)
        # Concatenate images horizontally
        concatenated = np.hstack(resized_faces)
        axs[idx].imshow(concatenated)
        axs[idx].axis('off')
        axs[idx].set_title(f"Cluster {label} (Count: {len(faces_in_cluster)})")
    plt.tight_layout()
    plt.show()


In [None]:
# noise cluster
noise_faces = [face for lab, face in zip(labels, face_images) if lab == -1]
if noise_faces:
    resized_noise = []
    for face in noise_faces:
        h, w, _ = face.shape
        new_h = 100
        new_w = int(w * (new_h / h))  # Keep aspect ratio
        resized_noise.append(cv2.resize(face, (new_w, new_h)))
    num_per_row = 5
    num_rows = math.ceil(len(resized_noise) / num_per_row)
    row_images = []
    max_width = 0
    for i in range(num_rows):
        row_faces = resized_noise[i * num_per_row:(i + 1) * num_per_row]
        row_image = np.hstack(row_faces)
        row_images.append(row_image)
        max_width = max(max_width, row_image.shape[1])  # Track widest row
    padded_rows = []
    for row in row_images:
        pad_width = max_width - row.shape[1]
        padded_row = cv2.copyMakeBorder(row, 0, 0, 0, pad_width, cv2.BORDER_CONSTANT, value=[255, 255, 255])
        padded_rows.append(padded_row)
    final_image = np.vstack(padded_rows)
    plt.figure(figsize=(15, 5 * num_rows))
    plt.imshow(cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.title("Noise Cluster (Label -1) - 5 Images per Row")
    plt.show()
else:
    print("No faces in noise cluster (-1) found.")