<a href="https://colab.research.google.com/github/nitisha-pradhan/The_Irregulars/blob/main/FaceNetWeights.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2

def get_face(img):
    '''Crops image to only include face plus a border'''
    height, width, channels = img.shape
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    face_box = face_cascade.detectMultiScale(img)
    # Get dimensions of bounding box
    x, y, w, h = tuple(map(tuple, face_box))[0]
    # Calculate padding as segmentation is too tight.
    pad_w = int(w/2.5)
    pad_h = int(h/2.5)
    # Get co-ordinates of crop
    x1 = max(0, x-pad_w)
    y1 = max(0, y-pad_h)
    x2 = min(width, x+w+pad_w)
    y2 = min(height, y+h+pad_h)
    # Crop image
    cropped = img[y1:y2,x1:x2]
    return cropped

In [None]:
import numpy as np

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as tfback

from pathlib import Path

from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
from keras.layers.core import Lambda, Flatten, Dense

from sklearn.metrics.pairwise import cosine_similarity

tfback.set_image_data_format('channels_first')


def conv2d_bn(x, layer_name, filters, kernel_size=(1, 1), strides=(1, 1), i='', epsilon=0.00001):
    '''2D Convolutional Block with Batch normalization and ReLU activation.

    Args:
        x (tf.Tensor): Input tensor.
        layer_name (str): Name of layer.
        filters (int): Number of filters to apply in 1st convolutional operation.
        kernel_size (Tuple[int, int]): Kernel size of filter to apply.
        strides (Tuple[int, int]): Strides of filter.
        i (str): index to append layer name, eg. 2 for conv2.
        epsilon (float): epsilon for batch normalization

    Returns:
        tensor (tf.Tensor): Tensor with graph applied.
    '''
    if layer_name:
        conv_name = f'{layer_name}_conv{i}'
        bn_name = f'{layer_name}_bn{i}'
    else:
        conv_name = f'conv{i}'
        bn_name = f'bn{i}'
    tensor = Conv2D(filters, kernel_size, strides=strides, data_format='channels_first', name=conv_name)(x)
    tensor = BatchNormalization(axis=1, epsilon=epsilon, name=bn_name)(tensor)
    tensor = Activation('relu')(tensor)
    return tensor

def inception_block_4a(X):
    # 3x3 Block
    X_3x3 = conv2d_bn(X, 'inception_4a_3x3', 96, i='1')
    X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3)
    X_3x3 = conv2d_bn(X_3x3, 'inception_4a_3x3', 128, kernel_size=(3, 3), i='2')

    # 5x5 Block
    X_5x5 = conv2d_bn(X, 'inception_4a_5x5', 16, i='1')
    X_5x5 = ZeroPadding2D(padding=(2, 2), data_format='channels_first')(X_5x5)
    X_5x5 = conv2d_bn(X_5x5, 'inception_4a_5x5', 32, kernel_size=(5, 5), i='2')

    # Max Pooling Block
    X_pool = MaxPooling2D(pool_size=3, strides=2, data_format='channels_first')(X)
    X_pool = conv2d_bn(X_pool, 'inception_4a_pool', 32)
    X_pool = ZeroPadding2D(padding=((3, 4), (3, 4)), data_format='channels_first')(X_pool)

    # 1x1 Block
    X_1x1 = conv2d_bn(X, 'inception_4a_1x1', 64)

    return concatenate([X_3x3, X_5x5, X_pool, X_1x1], axis=1)


def inception_block_4b(X):
    # 3x3 Block
    X_3x3 = conv2d_bn(X, 'inception_4b_3x3', 96, i='1')
    X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3)
    X_3x3 = conv2d_bn(X_3x3, 'inception_4b_3x3', 128, kernel_size=(3, 3), i='2')

    # 5x5 Block
    X_5x5 = conv2d_bn(X, 'inception_4b_5x5', 32, i='1')
    X_5x5 = ZeroPadding2D(padding=(2, 2), data_format='channels_first')(X_5x5)
    X_5x5 = conv2d_bn(X_5x5, 'inception_4b_5x5', 64, kernel_size=(5, 5), i='2')

    # Average Pooling Block
    X_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3), data_format='channels_first')(X)
    X_pool = conv2d_bn(X_pool, 'inception_4b_pool', 64)
    X_pool = ZeroPadding2D(padding=(4, 4), data_format='channels_first')(X_pool)

    # 1x1 Block
    X_1x1 = conv2d_bn(X, 'inception_4b_1x1', 64)

    return concatenate([X_3x3, X_5x5, X_pool, X_1x1], axis=1)


def inception_block_4c(X):
    # 3x3 Block
    X_3x3 = conv2d_bn(X, 'inception_4c_3x3', 128, i='1')
    X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3)
    X_3x3 = conv2d_bn(X_3x3, 'inception_4c_3x3', 256, kernel_size=(3, 3), strides=(2, 2), i='2')

    # 5x5 Block
    X_5x5 = conv2d_bn(X, 'inception_4c_5x5', 32, i='1')
    X_5x5 = ZeroPadding2D(padding=(2, 2), data_format='channels_first')(X_5x5)
    X_5x5 = conv2d_bn(X_5x5, 'inception_4c_5x5', 64, kernel_size=(5, 5), strides=(2, 2), i='2')

    # Max Pooling Block
    X_pool = MaxPooling2D(pool_size=3, strides=2, data_format='channels_first')(X)
    X_pool = ZeroPadding2D(padding=((0, 1), (0, 1)), data_format='channels_first')(X_pool)

    return concatenate([X_3x3, X_5x5, X_pool], axis=1)


def inception_block_5a(X):
    # 3x3 Block
    X_3x3 = conv2d_bn(X, 'inception_5a_3x3', 96, i='1')
    X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3)
    X_3x3 = conv2d_bn(X_3x3, 'inception_5a_3x3', 192, kernel_size=(3, 3), i='2')

    # 5x5 Block
    X_5x5 = conv2d_bn(X, 'inception_5a_5x5', 32, i='1')
    X_5x5 = ZeroPadding2D(padding=(2, 2), data_format='channels_first')(X_5x5)
    X_5x5 = conv2d_bn(X_5x5, 'inception_5a_5x5', 64, kernel_size=(5, 5), i='2')

    # Average Pooling Block
    X_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3), data_format='channels_first')(X)
    X_pool = conv2d_bn(X_pool, 'inception_5a_pool', 128)
    X_pool = ZeroPadding2D(padding=(2, 2), data_format='channels_first')(X_pool)

    # 1x1 Block
    X_1x1 = conv2d_bn(X, 'inception_5a_1x1', 256)

    return concatenate([X_3x3, X_5x5, X_pool, X_1x1], axis=1)
    

def inception_block_5b(X):
    # 3x3 Block
    X_3x3 = conv2d_bn(X, 'inception_5b_3x3', 160, i='1')
    X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3)
    X_3x3 = conv2d_bn(X_3x3, 'inception_5b_3x3', 256, kernel_size=(3, 3), strides=(2, 2), i='2')

    # 5x5 Block
    X_5x5 = conv2d_bn(X, 'inception_5b_5x5', 64, i='1')
    X_5x5 = ZeroPadding2D(padding=(2, 2), data_format='channels_first')(X_5x5)
    X_5x5 = conv2d_bn(X_5x5, 'inception_5b_5x5', 128, kernel_size=(5, 5), strides=(2, 2), i='2')

    # Max Pooling Block
    X_pool = MaxPooling2D(pool_size=3, strides=2, data_format='channels_first')(X)
    X_pool = ZeroPadding2D(padding=((0, 1), (0, 1)), data_format='channels_first')(X_pool)

    return concatenate([X_3x3, X_5x5, X_pool], axis=1)


def inception_block_6a(X):
    # 3x3 Block
    X_3x3 = conv2d_bn(X, 'inception_6a_3x3', 96, i='1')
    X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3)
    X_3x3 = conv2d_bn(X_3x3, 'inception_6a_3x3', 384, kernel_size=(3, 3), i='2')

    # Average Pooling Block
    X_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3), data_format='channels_first')(X)
    X_pool = conv2d_bn(X_pool, 'inception_6a_pool', 96)
    X_pool = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_pool)

    # 1x1 Block
    X_1x1 = conv2d_bn(X, 'inception_6a_1x1', 256)

    return concatenate([X_3x3, X_pool, X_1x1], axis=1)


def inception_block_6b(X):
    # 3x3 Block
    X_3x3 = conv2d_bn(X, 'inception_6b_3x3', 96, i='1')
    X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3)
    X_3x3 = conv2d_bn(X_3x3, 'inception_6b_3x3', 384, kernel_size=(3, 3), i='2')

    # Max Pooling Block
    X_pool = MaxPooling2D(pool_size=3, strides=2, data_format='channels_first')(X)
    X_pool = conv2d_bn(X_pool, 'inception_6b_pool', 96)
    X_pool = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_pool)

    # 1x1 Block
    X_1x1 = conv2d_bn(X, 'inception_6b_1x1', 256)

    return concatenate([X_3x3, X_pool, X_1x1], axis=1)



def facenet_model(input_shape):
    '''Implementation of the Inception model used for FaceNet.
    
    Arguments:
    input_shape (Tuple[int]): Shape of the images of the dataset.

    Returns:
    model (keras.models.Model): FaceNet model.
    '''
        
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)
    
    # First Block
    X = conv2d_bn(X, '', 64, kernel_size=(7, 7), strides=(2, 2), i='1', epsilon=0.001)
    
    # Zero-Padding + MAXPOOL
    X = ZeroPadding2D((1, 1))(X)
    X = MaxPooling2D((3, 3), strides = 2)(X)
    
    # Second Block
    X = conv2d_bn(X, '', 64, i='2')
    
    # Zero-Padding + MAXPOOL
    X = ZeroPadding2D((1, 1))(X)

    # Third Block
    X = conv2d_bn(X, '', 192, kernel_size=(3, 3), i='3')
    
    # Zero-Padding + MAXPOOL
    X = ZeroPadding2D((1, 1))(X)
    X = MaxPooling2D(pool_size = 3, strides = 2)(X)
    
    # Fourth Block (Inception)
    X = inception_block_4a(X)
    X = inception_block_4b(X)
    X = inception_block_4c(X)
    
    # Fifth Block (Inception)
    X = inception_block_5a(X)
    X = inception_block_5b(X)
    
    # Sixth Block (Inception)
    X = inception_block_6a(X)
    X = inception_block_6b(X)
    
    # Top layer
    X = AveragePooling2D(pool_size=(3, 3), strides=(1, 1), data_format='channels_first')(X)
    X = Flatten()(X)
    X = Dense(128, name='dense')(X)
    
    # L2 normalization
    X = Lambda(lambda  x: tfback.l2_normalize(x,axis=1))(X)

    # Create model instance
    model = Model(inputs = X_input, outputs = X, name='FaceNetModel')

    #weight_fpath = os.cwd.joinpath('weights', 'facenet_weights.h5')
    model.load_weights('/content/facenet_weights.h5')
        
    return model

def img_to_encoding(image, model):
    # Resize for model
    resized = cv2.resize(image, (96, 96))
    # Swap channel dimensions
    input_img = resized[...,::-1]
    # Switch to channels first and round to specific precision.
    input_img = np.around(np.transpose(input_img, (2,0,1))/255.0, decimals=12)
    x_train = np.array([input_img])
    embedding = model.predict_on_batch(x_train)
    return embedding

def cosine_sim(embedding_one, embedding_two):
  similarities = cosine_similarity(embedding_one, embedding_two)
  return similarities[0][0]

if __name__ == '__main__':
    
    #First image embedding
     image_path = '/content/note1.jpg'
     img = cv2.imread(image_path, 1)

     cropped = get_face(img)

     model = facenet_model(input_shape=(3, 96, 96))

     weight_fpath = '/content/facenet_weights.h5'
     model.load_weights(weight_fpath)

     embedding_one = img_to_encoding(cropped, model)
     print(embedding_one)

    #Second image embedding
     image_path = '/content/face.jpg'
     img = cv2.imread(image_path, 1)

     cropped = get_face(img)

     embedding_two = img_to_encoding(cropped, model)
     print(embedding_two)
    #Third image embedding
     image_path = '/content/tom1.jpg'
     img = cv2.imread(image_path, 1)

     cropped = get_face(img)
     embedding_three = img_to_encoding(cropped, model)
     print(embedding_three)

     print("1 and 2", cosine_sim(embedding_one, embedding_two))
     print("1 and 3", cosine_sim(embedding_one, embedding_three))
     print("2 and 3", cosine_sim(embedding_two, embedding_three))


[[ 0.10057095  0.19410542  0.06577026 -0.05318104  0.01362109  0.24428895
   0.06092777  0.02154713 -0.03541315 -0.04161831  0.02754257 -0.04916706
   0.02150827 -0.08118604  0.13124986 -0.15816164 -0.04259171 -0.02125218
  -0.11879972  0.03296365 -0.02994638  0.1340507  -0.01532782  0.13987401
  -0.03703727 -0.1623131  -0.06942742 -0.05056001  0.063347    0.00541252
   0.00785884 -0.07939906 -0.11207245  0.07171599  0.05154204  0.02564145
   0.07106373  0.03157922 -0.07223582  0.06759218  0.08306014 -0.04443373
  -0.13995853 -0.04314084 -0.13422687  0.08123747  0.06768766  0.05464636
  -0.19848157  0.11653883 -0.03190811  0.03708814 -0.00478288  0.01850133
  -0.01469674  0.00670513 -0.07694617  0.0977857  -0.04518026 -0.11299125
  -0.11229078  0.12209465  0.10994184 -0.19406065 -0.00905437  0.0448334
   0.16869573  0.01670841 -0.13994175  0.04406787  0.05560887  0.03400984
   0.01929025  0.04903843  0.09671772  0.02301131 -0.11101002 -0.03000821
   0.09394254  0.04342777  0.01854156  