# Face Recognition


## 1 - Packages

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Lambda, Flatten, Dense
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K
K.set_image_data_format('channels_last')
import os
import numpy as np
from numpy import genfromtxt
import pandas as pd
import tensorflow as tf

%matplotlib inline


## 2 - Face Verification


In [6]:
from tensorflow.keras.models import model_from_json

json_file = open('facenet_keras/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights('facenet_keras/model.h5')

In [7]:
print(model.inputs)
print(model.outputs)

[<tf.Tensor 'input_1_1:0' shape=(None, 160, 160, 3) dtype=float32>]
[<tf.Tensor 'Bottleneck_BatchNorm/batchnorm_1/add_1:0' shape=(None, 128) dtype=float32>]


In [31]:
model.summary()

Model: "inception_resnet_v1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 160, 160, 3) 0                                            
__________________________________________________________________________________________________
Conv2d_1a_3x3 (Conv2D)          (None, 79, 79, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
Conv2d_1a_3x3_BatchNorm (BatchN (None, 79, 79, 32)   96          Conv2d_1a_3x3[0][0]              
__________________________________________________________________________________________________
Conv2d_1a_3x3_Activation (Activ (None, 79, 79, 32)   0           Conv2d_1a_3x3_BatchNorm[0][0]    
________________________________________________________________________________

## 4 - Loading the Pre-trained Model

In [16]:
FRmodel = model

## 5 - Applying the Model

### 5.1 - Face Verification

In [17]:
def img_to_encoding(image_path, model):
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(160, 160))
    img = np.around(np.array(img) / 255.0, decimals=12)
    x_train = np.expand_dims(img, axis=0)
    embedding = model.predict_on_batch(x_train)
    return embedding / np.linalg.norm(embedding, ord=2) #L2

In [18]:
database = {}
database["cumberbatch"] = img_to_encoding("images/Benedict_Cumberbatch.png", FRmodel)
database["nolan"] = img_to_encoding("images/christopher_nolan.jpg", FRmodel)


In [19]:
cumberbatch = tf.keras.preprocessing.image.load_img("images/Benedict_Cumberbatch.png", target_size=(160, 160))
nolan = tf.keras.preprocessing.image.load_img("images/christopher_nolan.jpg", target_size=(160, 160))

In [22]:
np.around(np.array(sherlock) / 255.0, decimals=12).shape

(160, 160, 3)

In [24]:
def verify(image_path, identity, database, model):
    """ Function that verifies if the person on the image_path image is identity"""
    
    #compute the encoding for the image
    encoding = img_to_encoding(image_path, model)
    #compute distance with identity image
    dist = np.linalg.norm(encoding - database[identity])

    if dist<0.7:
        print("It's " + str(identity))

    else:
        print("It's not " + str(identity))

    return dist, door_open

In [None]:
verify("images/Sherlock-Holmes.jpg", "cumberbatch", database, FRmodel)

### 5.2 - Face Recognition

In [27]:
def face_recognition(image_path, database, model):
    """Implements face recognition for the office by finding who is the person on the image_path image """
    ##compute encoding for the image
    encoding =  img_to_encoding(image_path,model)
    
    ##Find the closest encoding

    min_dist = 100
    
    # loop over the database dictionary names and encodings.
    for (name, db_enc) in database.items():

        # compute L2 distance between the target "encoding" and the current db_enc from the database
        dist = np.linalg.norm(encoding - db_enc)

        # If this distance is less than the min_dist, then set min_dist to dist, and identity to name
        if dist < min_dist:
            min_dist = dist
            identity = name

    if min_dist > 0.7:
        print("Not in the database.")
    else:
        print ("it's " + str(identity) + ", the distance is " + str(min_dist))
        
    return min_dist, identity

In [None]:
face_recognition("images/Sherlock-Holmes.jpg", database, FRmodel)

### The Triplet Loss

Since we are using a pretrained model, we won't actually need to implement the triplet loss function

For an image $x$, its encoding is denoted as $f(x)$, where $f$ is the function computed by the neural network.

Training will use triplets of images $(A, P, N)$:

- A is an "Anchor" image--a picture of a person.
- P is a "Positive" image--a picture of the same person as the Anchor image.
- N is a "Negative" image--a picture of a different person than the Anchor image.

These triplets are picked from the training dataset. $(A^{(i)}, P^{(i)}, N^{(i)})$ is used here to denote the $i$-th training example.

we'd like to make sure that an image $A^{(i)}$ of an individual is closer to the Positive $P^{(i)}$ than to the Negative image $N^{(i)}$) by at least a margin $\alpha$:

$$
|| f\left(A^{(i)}\right)-f\left(P^{(i)}\right)||_{2}^{2}+\alpha<|| f\left(A^{(i)}\right)-f\left(N^{(i)}\right)||_{2}^{2}
$$


we would thus like to minimize the following "triplet cost":

$$\mathcal{J} = \sum^{m}_{i=1} \large[ \small \underbrace{\mid \mid f(A^{(i)}) - f(P^{(i)}) \mid \mid_2^2}_\text{(1)} - \underbrace{\mid \mid f(A^{(i)}) - f(N^{(i)}) \mid \mid_2^2}_\text{(2)} + \alpha \large ] \small_+ \tag{3}$$
Here, the notation "$[z]_+$" is used to denote $max(z,0)$.

**Notes**:

- The term (1) is the squared distance between the anchor "A" and the positive "P" for a given triplet; we want this to be small.
- The term (2) is the squared distance between the anchor "A" and the negative "N" for a given triplet, we want this to be relatively large. It has a minus sign preceding it because minimizing the negative of the term is the same as maximizing that term.
- $\alpha$ is called the margin. It's a hyperparameter that you pick manually. we'll use $\alpha = 0.2$.

Most implementations also rescale the encoding vectors to haven L2 norm equal to one (i.e., $\mid \mid f(img)\mid \mid_2$=1);


In [None]:
def triplet_loss(y_true, y_pred, alpha = 0.2):
        """Implementation of the triplet loss """

        anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]

        #Compute the (encoding) distance between the anchor and the positive
        pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor,positive)),axis=-1)
        #Compute the (encoding) distance between the anchor and the negative
        neg_dist =tf.reduce_sum(tf.square(tf.subtract(anchor,negative)),axis=-1) 
        #subtract the two previous distances and add alpha
        basic_loss = tf.add(tf.subtract(pos_dist,neg_dist),alpha)
        #Take the maximum of basic_loss and 0.0 Sum over the training examples
        loss = tf.reduce_sum(tf.maximum(basic_loss,0))
        return loss