In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install deepface opencv-python scikit-learn matplotlib


Collecting deepface
  Downloading deepface-0.0.94-py3-none-any.whl.metadata (35 kB)
Collecting Flask<=2.0.2,>=1.1.2 (from deepface)
  Downloading Flask-2.0.2-py3-none-any.whl.metadata (3.8 kB)
Collecting werkzeug<=2.0.2 (from deepface)
  Downloading Werkzeug-2.0.2-py3-none-any.whl.metadata (4.5 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.14 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collect

In [None]:
import os, glob, shutil
from deepface import DeepFace

input_root = "/content/drive/MyDrive/Datasets/testDataset"
flat_images = "/content/images_concat_aligned"
os.makedirs(flat_images, exist_ok=True)

for folder in os.listdir(input_root):
    folder_path = os.path.join(input_root, folder)
    if os.path.isdir(folder_path):
        for img in glob.glob(os.path.join(folder_path, "*.jpg")):
            shutil.copy(img, flat_images)

print("✅ Copied all images to:", flat_images)


25-08-03 16:14:20 - Directory /root/.deepface has been created
25-08-03 16:14:20 - Directory /root/.deepface/weights has been created
✅ Copied all images to: /content/images_concat_aligned


In [None]:
import numpy as np

models = ["Facenet", "VGG-Face", "SFace", "ArcFace"]
embeddings, paths = [], []

def normalize(vec):
    norm = np.linalg.norm(vec)
    return vec / norm if norm else vec

def get_concat_embedding(img_path):
    final_vec = []
    for model in models:
        try:
            emb = DeepFace.represent(img_path=img_path, model_name=model, detector_backend="mtcnn", enforce_detection=False)[0]["embedding"]
            final_vec.extend(normalize(emb))
        except:
            final_vec.extend([0]*128)
    return np.array(final_vec)

image_paths = sorted(glob.glob(flat_images + "/*.jpg"))

for img_path in image_paths:
    emb = get_concat_embedding(img_path)
    embeddings.append(emb)
    paths.append(img_path)

print(f"✅ Generated hybrid embeddings for {len(embeddings)} images.")


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/facenet_weights.h5
To: /root/.deepface/weights/facenet_weights.h5


25-08-03 16:15:11 - 🔗 facenet_weights.h5 will be downloaded from https://github.com/serengil/deepface_models/releases/download/v1.0/facenet_weights.h5 to /root/.deepface/weights/facenet_weights.h5...


100%|██████████| 92.2M/92.2M [00:00<00:00, 233MB/s]


25-08-03 16:15:16 - 🔗 vgg_face_weights.h5 will be downloaded from https://github.com/serengil/deepface_models/releases/download/v1.0/vgg_face_weights.h5 to /root/.deepface/weights/vgg_face_weights.h5...


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/vgg_face_weights.h5
To: /root/.deepface/weights/vgg_face_weights.h5
100%|██████████| 580M/580M [00:07<00:00, 80.7MB/s]


25-08-03 16:15:25 - 🔗 face_recognition_sface_2021dec.onnx will be downloaded from https://github.com/opencv/opencv_zoo/raw/main/models/face_recognition_sface/face_recognition_sface_2021dec.onnx to /root/.deepface/weights/face_recognition_sface_2021dec.onnx...


Downloading...
From: https://github.com/opencv/opencv_zoo/raw/main/models/face_recognition_sface/face_recognition_sface_2021dec.onnx
To: /root/.deepface/weights/face_recognition_sface_2021dec.onnx
100%|██████████| 38.7M/38.7M [00:00<00:00, 281MB/s]


25-08-03 16:15:30 - 🔗 arcface_weights.h5 will be downloaded from https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5 to /root/.deepface/weights/arcface_weights.h5...


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5
To: /root/.deepface/weights/arcface_weights.h5
100%|██████████| 137M/137M [00:01<00:00, 120MB/s]


✅ Generated hybrid embeddings for 46 images.


In [None]:
from sklearn.cluster import DBSCAN
from sklearn.metrics.pairwise import cosine_distances

distance_matrix = cosine_distances(embeddings)
db = DBSCAN(eps=0.4, min_samples=1, metric='precomputed')
labels = db.fit_predict(distance_matrix)


In [None]:
output_root = "/content/grouped_faces_dbscan"
os.makedirs(output_root, exist_ok=True)

for idx, label in enumerate(labels):
    group_dir = os.path.join(output_root, f"group_{label}")
    os.makedirs(group_dir, exist_ok=True)
    shutil.copy(paths[idx], group_dir)

print("✅ Grouped faces saved in:", output_root)


✅ Grouped faces saved in: /content/grouped_faces_dbscan


In [None]:
import matplotlib.pyplot as plt
import cv2

group_folders = sorted(os.listdir(output_root))

for group in group_folders:
    group_path = os.path.join(output_root, group)
    images = sorted(os.listdir(group_path))
    print(f"\n👥 {group} – {len(images)} image(s)")

    plt.figure(figsize=(min(15, len(images) * 2), 4))
    for idx, img_file in enumerate(images[:10]):
        img_path = os.path.join(group_path, img_file)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.subplot(1, min(10, len(images)), idx + 1)
        plt.imshow(img)
        plt.title(img_file[:10], fontsize=8)
        plt.axis('off')
    plt.tight_layout()
    plt.show()


Output hidden; open in https://colab.research.google.com to view.

In [None]:
code = """
from deepface import DeepFace
import numpy as np

class HybridEmbedder:
    def __init__(self, models=["Facenet", "VGG-Face", "SFace", "ArcFace"]):
        self.models = models

    def normalize(self, vec):
        norm = np.linalg.norm(vec)
        return vec / norm if norm else vec

    def get_embedding(self, img_path):
        final_vec = []
        for model in self.models:
            try:
                emb = DeepFace.represent(
                    img_path=img_path,
                    model_name=model,
                    detector_backend="mtcnn",
                    enforce_detection=False
                )[0]["embedding"]
                final_vec.extend(self.normalize(emb))
            except:
                final_vec.extend([0]*128)
        return np.array(final_vec)
"""

with open("HybridEmbedder.py", "w") as f:
    f.write(code)

print("✅ HybridEmbedder.py created successfully.")


✅ HybridEmbedder.py created successfully.


In [None]:
from HybridEmbedder import HybridEmbedder

embedder = HybridEmbedder()
embedding = embedder.get_embedding("/content/drive/MyDrive/Datasets/testDataset/Woody_Allen/Woody_Allen_0001.jpg")
print("✅ Embedding shape:", embedding.shape)


✅ Embedding shape: (4864,)


In [None]:
!cp HybridEmbedder.py /content/drive/MyDrive/


In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
!git --version
!git config --global user.name "muffin-123"
!git config --global user.email "rooha9tanveer@gmail.com"


git version 2.34.1


In [9]:
%cd /content
!git clone https://github.com/muffin-123/FaceGrouping_HybridEmbedder.git
%cd FaceGrouping_HybridEmbedder



/content
fatal: destination path 'FaceGrouping_HybridEmbedder' already exists and is not an empty directory.
/content/FaceGrouping_HybridEmbedder


In [7]:
!cp /content/HybridEmbedder.ipynb .


cp: cannot stat '/content/HybridEmbedder.ipynb': No such file or directory


In [8]:
!git add HybridEmbedder.ipynb
!git commit -m " (DBSCAN + MTCNN + Hybrid Embeddings)"
!git push origin main


fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
