# Face recognition

## Face verification

- input image
- output whether the input image is that of the claimed person

## Face recognition

- has database of $K$ persons
- get an input image
- output ID if the image is any of the $K$ persons (or not recognized)

## One-shot learning

- learning from one example to recognize the person again
- "similarity" function d(img1, img2) = degree of difference between images

## Siamese network

- run different images on the same network and compare encodings $f(x^{(i)})$
- if $x^{(i)}, x^{(j)}$ are the same person, $||f(x^{(i)}) - f(x^{(j)})||^{2}$ is small
- if $x^{(i)}, x^{(j)}$ are the different person, $||f(x^{(i)}) - f(x^{(j)})||^{2}$ is large

## Triplet loss

- anchor, positive, negative
- want $||f(A)-f(P)||^{2} \le ||f(A)-f(N)||^{2} - \alpha$ (margin to avoid parameters being all zeros)
- $L(A,P,N) = max(||f(A)-f(P)||^{2} - ||f(A)-f(N)||^{2} + \alpha, 0)$
- $J = \displaystyle\sum_{i=1}^{m}L(A^{(i)}, P^{(i)}, N^{(i)})$
- during training, if A,P,N are chosen randomly, $d(A,P) + \alpha \le d(A,N)$ is easily satisfied
    - choose triplets that are "hard" to train on $d(A,P) \approx d(A,N)$
    
## Learning the similarity function

- $\hat{y} = \sigma\left(\displaystyle\sum_{k=1}^{128}w_{i}|f(x^{(i)})_{k} - f(x^{(j)})_{k}| + b\right)$

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from keras.models import Model
# from keras.layers.normalization import BatchNormalization
from tensorflow.keras.layers import BatchNormalization
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
from keras.layers.merge import Concatenate
from keras.layers.core import Lambda, Flatten, Dense
from keras.initializers import glorot_uniform
# from keras.engine.topology import Layer
from tensorflow.keras.layers import Layer, InputSpec
from keras import backend as K
from cv_utils import * 
K.set_image_data_format('channels_first')
# import cv2
import os
import numpy as np
from numpy import genfromtxt
import pandas as pd
import tensorflow as tf
# import keras.backend.tensorflow_backend as tfback


%matplotlib inline
%load_ext autoreload
%autoreload 2

#np.set_printoptions(threshold=np.nan)
print("tf.__version__ is", tf.__version__)
print("tf.keras.__version__ is:", tf.keras.__version__)

### Encoding

- Encodes each input face image into a 128-dimensional vector.
- Input: tensor of shape $(m, n_C, n_H, n_W) = (m, 3, 96, 96)$ 
- Output: matrix of shape $(m, 128)$

In [None]:
def conv2d_bn(x,
              layer=None,
              cv1_out=None,
              cv1_filter=(1, 1),
              cv1_strides=(1, 1),
              cv2_out=None,
              cv2_filter=(3, 3),
              cv2_strides=(1, 1),
              padding=None):
    num = '' if cv2_out == None else '1'
    tensor = Conv2D(cv1_out, cv1_filter, strides=cv1_strides, data_format='channels_first', name=layer+'_conv'+num)(x)
    tensor = BatchNormalization(axis=1, epsilon=0.00001, name=layer+'_bn'+num)(tensor)
    tensor = Activation('relu')(tensor)
    if padding == None:
        return tensor
    tensor = ZeroPadding2D(padding=padding, data_format='channels_first')(tensor)
    if cv2_out == None:
        return tensor
    tensor = Conv2D(cv2_out, cv2_filter, strides=cv2_strides, data_format='channels_first', name=layer+'_conv'+'2')(tensor)
    tensor = BatchNormalization(axis=1, epsilon=0.00001, name=layer+'_bn'+'2')(tensor)
    tensor = Activation('relu')(tensor)
    return tensor

In [None]:
def faceRecoModel(input_shape):
    """
    Implementation of the Inception model used for FaceNet
    
    Arguments:
    input_shape -- shape of the images of the dataset

    Returns:
    model -- a Model() instance in Keras
    """
        
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)
    
    # First Block
    X = Conv2D(64, (7, 7), strides = (2, 2), name = 'conv1')(X)
    X = BatchNormalization(axis = 1, name = 'bn1')(X)
    X = Activation('relu')(X)
    
    # Zero-Padding + MAXPOOL
    X = ZeroPadding2D((1, 1))(X)
    X = MaxPooling2D((3, 3), strides = 2)(X)
    
    # Second Block
    X = Conv2D(64, (1, 1), strides = (1, 1), name = 'conv2')(X)
    X = BatchNormalization(axis = 1, epsilon=0.00001, name = 'bn2')(X)
    X = Activation('relu')(X)
    
    # Zero-Padding + MAXPOOL
    X = ZeroPadding2D((1, 1))(X)

    # Second Block
    X = Conv2D(192, (3, 3), strides = (1, 1), name = 'conv3')(X)
    X = BatchNormalization(axis = 1, epsilon=0.00001, name = 'bn3')(X)
    X = Activation('relu')(X)
    
    # Zero-Padding + MAXPOOL
    X = ZeroPadding2D((1, 1))(X)
    X = MaxPooling2D(pool_size = 3, strides = 2)(X)
    
    # Inception 1: a/b/c
    X = inception_block_1a(X)
    X = inception_block_1b(X)
    X = inception_block_1c(X)
    
    # Inception 2: a/b
    X = inception_block_2a(X)
    X = inception_block_2b(X)
    
    # Inception 3: a/b
    X = inception_block_3a(X)
    X = inception_block_3b(X)
    
    # Top layer
    X = AveragePooling2D(pool_size=(3, 3), strides=(1, 1), data_format='channels_first')(X)
    X = Flatten()(X)
    X = Dense(128, name='dense_layer')(X)
    
    # L2 normalization
    X = Lambda(lambda  x: K.l2_normalize(x,axis=1))(X)

    # Create model instance
    model = Model(inputs = X_input, outputs = X, name='FaceRecoModel')
        
    return model

In [None]:
FRmodel = faceRecoModel(input_shape=(3, 96, 96))
print("Total Params:", FRmodel.count_params())