In [3]:
from mtcnn import MTCNN
import cv2
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt
from keras_facenet import FaceNet
import warnings

warnings.filterwarnings("ignore")

In [14]:
model = FaceNet()
detector = MTCNN()




In [5]:
def apply_augmentations(image_array):
    image = image_array.copy()

    # Rotation (adjust angle as needed)
    rows, cols = image.shape[:2]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 20, 1)
    image = cv2.warpAffine(image, M, (cols, rows))

    # Horizontal flip with 50% probability
    if np.random.rand() > 0.5:
        image = cv2.flip(image, 1)

    # Add more transformations here if desired

    return image

In [6]:
def extract_face(filename=None, image_pixels=None, required_size=(160, 160)):
    if filename is not None:
        image = Image.open(filename)
        image = image.convert("RGB")
        pixels = np.asarray(image)
    elif image_pixels is not None:
        pixels = image_pixels
    detector = detector
    results = detector.detect_faces(pixels)
    x1, y1, width, height = results[0]["box"]
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    face = pixels[y1:y2, x1:x2]
    box_dimensions = (x1, y1, width, height)
    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = np.asarray(image)
    return face_array, box_dimensions

In [7]:
def load_faces(directory, required_size=(160, 160)):
    faces = []
    for filename in os.listdir(directory):
        path = os.path.join(directory, filename)
        image = Image.open(path).convert("RGB")
        image_array = np.asarray(image)

        face_array, _ = extract_face(image_array)
        augmented_image = apply_augmentations(face_array)
        augmented_image = cv2.resize(augmented_image, required_size)
        faces.append(augmented_image)

    return faces

In [8]:
# load a dataset that contains one subdir for each class that in turn contains images
def load_dataset(directory):
    X, y = [], []
    for subdir in os.listdir(directory):
        path = os.path.join(directory, subdir)
        if not os.path.isdir(path):
            continue
        faces = load_faces(path)
        labels = [subdir for _ in range(len(faces))]
        print(f"Loaded {len(faces)} examples for class: {subdir}")
        X.extend(faces)
        y.extend(labels)
    return np.asarray(X), np.asarray(y)

In [16]:
from keras_facenet import FaceNet


def calculate_embeddings(model, faces, batch_size=32):
    num_faces = len(faces)
    embeddings = []

    for start_idx in range(0, num_faces, batch_size):
        end_idx = min(start_idx + batch_size, num_faces)
        batch = faces[start_idx:end_idx]
        batch_embeddings = model.embeddings(batch)
        embeddings.extend(batch_embeddings)

    return np.array(embeddings)

In [17]:
# Training Set
train_dataset_path = r"M:/14-celebrity-faces-dataset/data/train"
train_faces, train_labels = load_dataset(train_dataset_path)
train_embeddings = calculate_embeddings(model, train_faces)
# store_embeddings(train_embeddings, train_labels, "train_embeddings.h5")

# Testing Set
test_dataset_path = r"M:/14-celebrity-faces-dataset/data/val"
test_faces, test_labels = load_dataset(test_dataset_path)
test_embeddings = calculate_embeddings(model, test_faces)
# store_embeddings(test_embeddings, test_labels, "test_embeddings.h5")

AttributeError: 'numpy.ndarray' object has no attribute 'read'

In [18]:
import h5py


def store_embeddings(embeddings, labels, filename):
    with h5py.File(filename, "w") as f:
        f.create_dataset("embeddings", data=embeddings, compression="gzip")
        # Convert labels to bytes before storing
        labels_as_bytes = [label.encode("utf-8") for label in labels]
        f.create_dataset("labels", data=labels_as_bytes, compression="gzip")

In [19]:
store_embeddings(train_embeddings, train_labels, "train_embeddings.h5")
store_embeddings(test_embeddings, test_labels, "test_embeddings.h5")

NameError: name 'train_embeddings' is not defined