In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics.pairwise import cosine_similarity

# Triplet Loss

For an image $x$, its encoding is denoted as $f(x)$, where $f$ is the function computed by the neural network.

<div style="text-align: center;">
    <img src="images/f_x.png" style="width:400px;height:200px;">
</div>

### Triplet Training:
- Training will use **triplets of images** $(A, P, N)$, where:
    - **A** is the "Anchor" image — a picture of a person.
    - **P** is the "Positive" image — a picture of the same person as the Anchor.
    - **N** is the "Negative" image — a picture of a different person than the Anchor.

- These triplets are selected from the training dataset. Let $(A^{(i)}, P^{(i)}, N^{(i)})$ denote the $i$-th training example.

- You aim to ensure that an image $A^{(i)}$ (Anchor) is **closer** to the Positive $P^{(i)}$ than to the Negative $N^{(i)}$, by at least a margin $\alpha$:

$$
|| f\left(A^{(i)}\right)-f\left(P^{(i)}\right)||_{2}^{2}+\alpha<|| f\left(A^{(i)}\right)-f\left(N^{(i)}\right)||_{2}^{2}
$$

### Triplet Loss:
- We define the following **triplet loss** function:

$$
\mathcal{J} = \sum^{m}_{i=1} \left[ \underbrace{\mid \mid f(A^{(i)}) - f(P^{(i)}) \mid \mid_2^2}_\text{(1)} - \underbrace{\mid \mid f(A^{(i)}) - f(N^{(i)}) \mid \mid_2^2}_\text{(2)} + \alpha \right]_+
\tag{3}
$$

> **Note**: The notation "$[z]_+$" denotes $\max(z, 0)$.

### Key Points:
- Term (1): Squared distance between the **anchor** (A) and the **positive** (P) for a given triplet; you want this to be small.
- Term (2): Squared distance between the **anchor** (A) and the **negative** (N) for a given triplet; you want this to be large
- **$\alpha$**: This is the margin—a manually chosen hyperparameter. Here, $\alpha = 0.2$.

> **Note**: In most implementations, the encoding vectors are rescaled to have an L2 norm equal to 1 (i.e., $\mid \mid f(img) \mid \mid_2 = 1$). You won’t need to handle this here.


In [2]:
def triplet_loss(Y_true, Y_pred, alpha=0.2):
    anchor, positive, negative = Y_pred[0], Y_pred[1], Y_pred[2]

    pos_dist = tf.subtract(anchor, positive)  # (None, n_features)
    pos_dist = tf.square(pos_dist)  # (None, n_features)
    pos_dist = tf.reduce_sum(pos_dist, axis=-1)  # (None, )

    neg_dist = tf.subtract(anchor, negative)  # (None, n_features)
    neg_dist = tf.square(neg_dist)  # (None, n_features)
    neg_dist = tf.reduce_sum(neg_dist, axis=-1)  #  (None, )

    loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)  # (None, )
    loss = tf.maximum(loss, 0)  # (None, )
    loss = tf.reduce_sum(loss)
    return loss

# Pre-trained Model

For the face recognition task, the preferred model is **Inception ResNet v1**. However, since **Inception ResNet v1** is not accessible, I will be using **Inception ResNet v2**. Below are the details of both models:

- `Inception ResNet v1`
    - **Input shape**: (160, 160, 3)
    - **Output**: 128 classes (representing face embeddings)
    - **Purpose**: Face recognition, typically used for extracting face features for identification and verification
    - **Dataset**: Commonly trained on specialized face datasets such as: `VGGFace2`, `MS-Celeb-1M`, `CASIA-WebFace`
    - ---
- `Inception ResNet v2`
    - **Input shape**: (299, 299, 3)
    - **Output**: 1000 classes (representing general object categories)
    - **Purpose**: Generic image classification, commonly used to classify objects like animals, vehicles, and other everyday objects
    - **Dataset**: Typically trained on the `ImageNet` dataset, which includes 1000 different object categories
    - ---
> In summary, if **Inception ResNet v1** becomes accessible, it should be your preferred choice for face recognition tasks, as it delivers superior performance in this domain.

In [3]:
inception_resnet_v2 = keras.applications.InceptionResNetV2()
inception_resnet_v2.summary()




# Face Embedding

In [4]:
def extract_Face_Features(image_path, model='inception_resnet_v2', image_size=(299, 299)):
    img = keras.preprocessing.image.load_img(image_path, target_size=image_size)
    img = np.array(img)
    img = np.around(img / 255.0, decimals=12)
    
    img = np.expand_dims(img, axis=0)

    if model == 'inception_resnet_v1':
        feature_vector = inception_resnet_v1.predict(img)
    elif model == 'inception_resnet_v2':
        feature_vector = inception_resnet_v2.predict(img)
        
    vector_length = np.linalg.norm(feature_vector, ord=2)
    
    return feature_vector / vector_length

In [5]:
base_path = "images/human_faces/"
names = ["danielle", "younes", "tian", "andrew", "kian", "dan", 
         "sebastiano", "bertrand", "kevin", "felix", "benoit", "arnaud"]

database = {
    name: extract_Face_Features(f"{base_path}{name}.jpg") for name in names
}

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 884ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 862ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 823ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 996ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 864ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 810ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 855ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 835ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 815ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 771ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 781ms/step


In [6]:
for i, j in database.items():
    print(i, j.shape)

danielle (1, 1000)
younes (1, 1000)
tian (1, 1000)
andrew (1, 1000)
kian (1, 1000)
dan (1, 1000)
sebastiano (1, 1000)
bertrand (1, 1000)
kevin (1, 1000)
felix (1, 1000)
benoit (1, 1000)
arnaud (1, 1000)


# Face Verification

In [7]:
def verify(image_path, identity, database, model='inception_resnet_v2'):
    features_from_camera = extract_Face_Features(image_path, model=model)
    features_from_database = database[identity]

    dist = np.linalg.norm(features_from_camera - features_from_database, ord=2)
    if dist < 1:  # 0.7 is more appropriate but I have to use 1 since I dont use Inception Resnet v1, just to test, and the result still sucks
        print(f"It's {identity}, welcome home!")
        open_door = True
    else:
        print(f"It's not {identity}, please go away.")
        open_door = False

    return dist, open_door

In [8]:
for i in range(6):
    print(verify(f"images/human_faces/camera_{i}.jpg", "younes", database))
    print('-'*100)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 770ms/step
It's younes, welcome home!
(0.9597599, True)
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 768ms/step
It's not younes, please go away.
(1.0582379, False)
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 830ms/step
It's younes, welcome home!
(0.9957221, True)
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 892ms/step
It's not younes, please go away.
(1.0482202, False)
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
It's not younes, please go away.
(1.0128

In [9]:
for i in range(6):
    print(verify(f"images/human_faces/camera_{i}.jpg", "kian", database))
    print('-'*100)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 954ms/step
It's not kian, please go away.
(1.4095857, False)
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 885ms/step
It's not kian, please go away.
(1.4093497, False)
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 896ms/step
It's not kian, please go away.
(1.4111071, False)
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 923ms/step
It's not kian, please go away.
(1.4055458, False)
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
It's not kian, please go away.
(1.

# Face Recognition

In [10]:
def identify(image_path, database, model='inception_resnet_v2'):
    features_from_camera = extract_Face_Features(image_path, model=model)

    min_dist = 100
    for name, features in database.items():
        dist = np.linalg.norm(features - features_from_camera, ord=2)
        if dist < min_dist:
            min_dist = dist
            identity = name

    if min_dist > 1:  # 0.7 is more appropriate but I have to use 1 since I dont use Inception Resnet v1, just to test, and the result still sucks
        print("Not in the database.")
    else:
        print(f"It's {identity}, welcome home!")

    return min_dist, identity

In [11]:
for i in range(6):
    print(identify(f"images/human_faces/camera_{i}.jpg", database))
    print('-'*100)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 912ms/step
It's benoit, welcome home!
(0.19594195, 'benoit')
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 808ms/step
It's sebastiano, welcome home!
(0.24829079, 'sebastiano')
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 820ms/step
It's benoit, welcome home!
(0.1038944, 'benoit')
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 955ms/step
It's bertrand, welcome home!
(0.07149996, 'bertrand')
----------------------------------------------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
It's arnaud, welcome ho

In [12]:
for i in database:
    print(i)
    print(identify(f"images/human_faces/{i}.jpg", database))
    print('-'*100)

danielle
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 818ms/step
It's danielle, welcome home!
(0.0, 'danielle')
----------------------------------------------------------------------------------------------------
younes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 836ms/step
It's younes, welcome home!
(0.0, 'younes')
----------------------------------------------------------------------------------------------------
tian
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 822ms/step
It's tian, welcome home!
(0.0, 'tian')
----------------------------------------------------------------------------------------------------
andrew
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 948ms/step
It's andrew, welcome home!
(0.0, 'andrew')
----------------------------------------------------------------------------------------------------
kian
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
It's kian, welcome home!
(0.0

# References
1. Florian Schroff, Dmitry Kalenichenko, James Philbin (2015). [FaceNet: A Unified Embedding for Face Recognition and Clustering](https://arxiv.org/pdf/1503.03832.pdf)

2. Yaniv Taigman, Ming Yang, Marc'Aurelio Ranzato, Lior Wolf (2014). [DeepFace: Closing the gap to human-level performance in face verification](https://research.fb.com/wp-content/uploads/2016/11/deepface-closing-the-gap-to-human-level-performance-in-face-verification.pdf)

3. This implementation also took a lot of inspiration from the official FaceNet github repository: https://github.com/davidsandberg/facenet

4. Further inspiration was found here: https://machinelearningmastery.com/how-to-develop-a-face-recognition-system-using-facenet-in-keras-and-an-svm-classifier/

5. And here: https://github.com/nyoki-mtl/keras-facenet/blob/master/notebook/tf_to_keras.ipynb