# POVa - Facial recognition from RGB-Depth images (identification)

Our goal is to get some face recognition working using RGB-D data (e.g. Kinect).

- Detect faces in images using an existing detector. Good choices are OpenCV, Dlib or MTCNN https://github.com/DCGM/mtcnn.
- Align the face based on detected facial features (map to an average face).
- Optional: (Try align 3D face pose using the depth data)
- Train a neural network to identify faces.

In [1]:
import os
import random

dataset_name = "atulanandjha/lfwpeople"
DATA_PATH = "../data/" + dataset_name

Run only if you don't have the dataset already in your project.
________

In [None]:
import kagglehub

path = kagglehub.dataset_download(dataset_name)
print("\n"+path)

In [None]:
%mkdir -p $DATA_PATH
%mv $path/* $DATA_PATH

In [None]:
!tar -xvzf $DATA_PATH/*.tgz -C $DATA_PATH

In [None]:
import shutil

if not os.path.isdir("../data/dataset/"):
    os.mkdir("../data/dataset/")
    os.mkdir("../data/dataset/training")
    os.mkdir("../data/dataset/test")

for base_path, dirs, _ in os.walk("../data/" + dataset_name + "/lfw_funneled"):
    for dir in dirs:
        if len(os.listdir(os.path.join(base_path, dir))) >= 5:
            if not os.path.isdir(os.path.join("../data/dataset/training", dir)):
                os.mkdir(os.path.join("../data/dataset/training", dir))
    
            file_list = os.listdir(os.path.join(base_path, dir))
            random.shuffle(file_list)
    
            split = int(0.8 * len(file_list))
    
            for file_name in file_list[0:split]:
                full_img_path = os.path.join(base_path, dir, file_name)
                if os.path.isfile(full_img_path):
                    shutil.copy(full_img_path, os.path.join("../data/dataset/training", dir))
        
            for file_name in file_list[split:]:
                full_img_path = os.path.join(base_path, dir, file_name)
                if os.path.isfile(full_img_path):
                    shutil.copy(full_img_path, "../data/dataset/test")

_____________________

## LFW - People (Face Recognition) Dataset

https://vis-www.cs.umass.edu/lfw/

In [None]:
%pip install mtcnn tensorflow keras

In [None]:
from mtcnn import MTCNN
import cv2
import numpy as np

train_dataset_path = "../data/dataset/training/"
test_dataset_path = "../data/dataset/test/"

detector = MTCNN()

image_files = []
for root, _, files in os.walk(train_dataset_path):
    for file in files:
        if file.endswith((".jpg", ".jpeg", ".png")):
            image_files.append(os.path.join(root, file))

In [6]:
random_images = random.sample(image_files, 10)

image_size = (200, 200)
rows, cols = 2, 5
canvas = np.zeros((rows * image_size[1], cols * image_size[0], 3), dtype=np.uint8)

for idx, image_path in enumerate(random_images):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Could not load image: {image_path}")
        continue

    faces = detector.detect_faces(image)
    for face in faces:
        box = face['box']
        cv2.rectangle(image, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (255, 0, 0), 2)

    image_resized = cv2.resize(image, image_size)

    row = idx // cols
    col = idx % cols
    y_start = row * image_size[1]
    y_end = y_start + image_size[1]
    x_start = col * image_size[0]
    x_end = x_start + image_size[0]

    canvas[y_start:y_end, x_start:x_end, :] = image_resized

cv2.imshow("Detected Faces Collage", canvas)
cv2.waitKey(0)  # Escape key
cv2.destroyAllWindows()

Display the number of different classes:

In [None]:
n_labels = 0
for root, dirs, files in os.walk(DATA_PATH + "/lfw_funneled/"):
    n_labels += len(dirs)
    break

print("Number of classes/directories in the dataset: ", n_labels)

________________________

## Face Detection

In [8]:
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split

lfw_people = fetch_lfw_people(min_faces_per_person=20, resize=1.0)
X = lfw_people.images
y = lfw_people.target
target_names = lfw_people.target_names

n_samples, W, H = lfw_people.images.shape
n_features = X.shape[1]
n_classes = target_names.shape[0]
n_labels = n_classes

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
print("X:", X)
print("\ny:", y)

print("\nX shape:", X.shape)
print("\ny shape:", y.shape)

In [None]:
print("Total dataset size:")
print("n_samples: %d" % n_samples)
print("n_features: %d" % n_features)
print("n_classes: %d" % n_classes)

In [None]:
import matplotlib.pyplot as plt

plt.figure(1)
figs, axes = plt.subplots(4, 6)
for i in range(4):
    for j in range(6): 
        axes[i, j].imshow(X[i*6+j,:,:], cmap='gray')
        axes[i, j].set_xticks([])
        axes[i, j].set_yticks([])
plt.show()

In [12]:
from tensorflow.keras.utils import to_categorical

# normalization
x_train = x_train.astype('float32') / 255.0
x_test  = x_test.astype( 'float32') / 255.0

# categorical vectors
y_train = to_categorical(y_train, n_labels)
y_test  = to_categorical(y_test, n_labels)

## Facial Feature Alignment

In [13]:
def draw_guide_lines(image, keypoints):
    left_eye = keypoints['left_eye']
    right_eye = keypoints['right_eye']
    
    eye_center = (
        int((left_eye[0] + right_eye[0]) / 2),
        int((left_eye[1] + right_eye[1]) / 2)
    )
    
    dy = right_eye[1] - left_eye[1]
    dx = right_eye[0] - left_eye[0]
    angle = np.arctan2(dy, dx)
    angle_degrees = np.degrees(angle)

    length = 200
    x1 = int(eye_center[0] - length * np.cos(angle))
    y1 = int(eye_center[1] - length * np.sin(angle))
    x2 = int(eye_center[0] + length * np.cos(angle))
    y2 = int(eye_center[1] + length * np.sin(angle))

    cv2.line(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

    perp_angle = angle + np.pi / 2  # Ajout de 90° en radians
    x3 = int(eye_center[0] - length * np.cos(perp_angle))
    y3 = int(eye_center[1] - length * np.sin(perp_angle))
    x4 = int(eye_center[0] + length * np.cos(perp_angle))
    y4 = int(eye_center[1] + length * np.sin(perp_angle))

    cv2.line(image, (x3, y3), (x4, y4), (255, 0, 0), 2)

    return image, angle_degrees

In [14]:
def align_face(image, keypoints, output_size=(128, 128)):
    left_eye = keypoints['left_eye']
    right_eye = keypoints['right_eye']

    # Center point between the eyes
    eye_center = (
        int((left_eye[0] + right_eye[0]) / 2),
        int((left_eye[1] + right_eye[1]) / 2)
    )
    
    # Angle between the eyes
    dy = right_eye[1] - left_eye[1]
    dx = right_eye[0] - left_eye[0]
    angle = np.degrees(np.arctan2(dy, dx))

    rotation_matrix = cv2.getRotationMatrix2D(eye_center, angle, 1.0)
    aligned_image = cv2.warpAffine(
        image, rotation_matrix, (image.shape[1], image.shape[0]),
        flags=cv2.INTER_CUBIC
    )

    return aligned_image

In [None]:
figure, axes = plt.subplots(3, 3, figsize=(6, 6))

random_images = random.sample(image_files, 3)
for idx, image_path in enumerate(random_images):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Could not load image: {image_path}")
        continue

    faces = detector.detect_faces(image)
    if len(faces) == 0:
        print(f"No faces detected in image: {image_path}")
        continue

    face = faces[0]
    box = face['box']
    keypoints = face['keypoints']

    image_with_lines, angle = draw_guide_lines(image.copy(), keypoints)

    cv2.rectangle(image, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (255, 0, 0), 2)

    x_, y_, width_, height_ = box
    face_region = image[y_:y_ + height_, x_:x_ + width_]
    aligned_face = align_face(face_region, keypoints)

    axes[idx, 0].imshow(cv2.cvtColor(image_with_lines , cv2.COLOR_BGR2RGB))
    axes[idx, 0].set_title(f"Original Image (angle: {angle:.2f}°)")
    axes[idx, 1].imshow(cv2.cvtColor(face_region, cv2.COLOR_BGR2RGB))
    axes[idx, 1].set_title("Original Face")
    axes[idx, 2].imshow(cv2.cvtColor(aligned_face, cv2.COLOR_BGR2RGB))
    axes[idx, 2].set_title("Aligned Face")
    
for ax in axes.flat:
    ax.set_xticks([])
    ax.set_yticks([])

plt.tight_layout()
plt.show()

In [None]:
sift = cv2.SIFT_create(25)

img = x_train[0]

# Rescale the values to [0, 1]
normalized_img = (img - img.min()) / (img.max() - img.min())
scaled_img = (normalized_img * 255).astype('uint8')

key_desc = sift.detectAndCompute(scaled_img, None)
image = cv2.drawKeypoints(scaled_img, key_desc[0], None)

figure, ax = plt.subplots(1, 2, figsize=(6, 6))
ax[0].imshow(img, cmap='gray')
ax[0].set_title("Original Image")
ax[1].imshow(image)
ax[1].set_title("SIFT Keypoints")
plt.show()

In [None]:
image_files = []
for root, _, files in os.walk(train_dataset_path):
    for file in files:
        if file.endswith((".jpg", ".jpeg", ".png")):
            image_files.append(os.path.join(root, file))

random_images = random.sample(image_files, 10)

figure, axes = plt.subplots(2, 5, figsize=(10, 8))
figure.suptitle("Facial Keypoints Detection")

row = 0
for idx, image_path in enumerate(random_images):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Could not load image: {image_path}")
        continue
    
    # Uncomment to get the path of the image
    # print(f"Processing image: {image_path}")
    
    faces = detector.detect_faces(image)
    
    if len(faces) == 0:
        print(f"No faces detected in the image {idx}")
        continue

    # print(f"Found {len(faces)} face(s) in the image.")
    
    for face_idx, face in enumerate(faces):
        keypoints = face['keypoints']
        
        # Uncomment to print the keypoints + the position of the facial features
        # print(f"Face {face_idx + 1} keypoints:")
        # for key, point in keypoints.items():
        #    print(f"{key}: ({int(point[0])}, {int(point[1])})")

        for key, point in keypoints.items():
            cv2.circle(image, point, 2, (0, 255, 255), -1)

        box = face['box']
        cv2.rectangle(image, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (255, 0, 0), 2)
    
    if idx % 5 == 0 and idx != 0:
        row += 1

    axes[row, idx%5].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[row, idx%5].set_title(f"{len(faces)} face(s)")
    axes[row, idx%5].set_xticks([])
    axes[row, idx%5].set_yticks([])
plt.tight_layout()
plt.show()

In [18]:
training_aligned_path = "../data/dataset/training_aligned"
test_aligned_path = "../data/dataset/test_aligned"

def create_aligned_dataset(input_path, output_path):
    for root, _, files in os.walk(input_path):
        for file in files:
            if file.endswith((".jpg", ".jpeg", ".png")):
                image_path = os.path.join(root, file)
                image = cv2.imread(image_path)

                faces = detector.detect_faces(image)
                for idx, face in enumerate(faces):
                    box = face['box']
                    keypoints = face['keypoints']
                    
                    x_, y_, width_, height_ = box
                    face_region = image[y_:y_ + height_, x_:x_ + width_]
                    
                    aligned_face = align_face(face_region, keypoints)
                    
                    aligned_image_path = os.path.join(output_path, f"{os.path.basename(file).split('.')[0]}_aligned_{idx}.jpg")
                    cv2.imwrite(aligned_image_path, aligned_face)

In [19]:
if not os.path.exists(training_aligned_path):
    os.makedirs(training_aligned_path)
    create_aligned_dataset("../data/dataset/training", training_aligned_path)

if not os.path.exists(test_aligned_path):
    os.makedirs(test_aligned_path)
    create_aligned_dataset("../data/dataset/test", test_aligned_path)

## CNN model

In [20]:
batch_size = 64
epochs     = 100
l_rate      = 2e-4

In [21]:
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras import optimizers
import tensorflow as tf

def baseline(W=32, H=32, nclass=10, nchannel=3,lr=1e-4):
    in1 = layers.Input(shape=(W, H, nchannel))
    x = layers.Conv2D(32, (3, 3), strides=(1, 1),
                      padding='valid',
                      activation='relu')(in1)    
    x = layers.MaxPool2D((2, 2))(x)
    x = layers.BatchNormalization()(x)    
    x = layers.Conv2D(32, (3, 3), strides=(1, 1),
                      padding='valid',
                      activation='relu')(x)    
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D((2, 2))(x)    
    x = layers.Conv2D(64, (3, 3), strides=(1, 1),
                      padding='valid',
                      activation='relu')(x)
    x = layers.MaxPool2D((2, 2))(x)
    x = layers.Flatten()(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(1280, activation='relu')(x)
    output = layers.Dense(nclass, activation='softmax')(x)
    model = Model(inputs=in1, outputs=output)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(learning_rate=lr),
                  metrics=['acc'])
    return model

In [22]:
checkpoint_filepath = './checkpoints/checkpoint.weights.h5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_acc',
    mode='max',
    save_best_only=True
)

In [None]:
model_base = baseline(W=W, H=H, nclass=n_labels, nchannel=1, lr=l_rate)
model_base.summary()

In [None]:
history = model_base.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=[model_checkpoint_callback], validation_split=0.2, verbose=False)

model_base.load_weights(checkpoint_filepath)   
test_loss, test_acc = model_base.evaluate(x_test, y_test)    
print('test acc for model_base: ', test_acc)

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss'] 

plt.figure(2)
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', label='Training acc - baseline')
plt.plot(epochs, val_acc, 'm:', label='Validation acc - baseline')

plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.grid()
plt.legend()
plt.show()

In [None]:
plt.figure(3)
plt.plot(epochs, loss, 'r',  label='Training loss - baseline')
plt.plot(epochs, val_loss, 'm:', label='Validation loss - baseline')
plt.title( 'Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid()
plt.show()