In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, Input, Model
from keras.layers import Activation, AveragePooling2D, BatchNormalization, Concatenate, Conv2D, Dense, Dropout, Flatten
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, Input, Lambda, MaxPooling2D
from keras import backend as K
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications import Xception


import numpy as np
import pandas as pd
!pip install mtcnn
from mtcnn.mtcnn import MTCNN
import cv2
import os
import h5py

import plotly.graph_objects as go
import plotly



In [None]:
'''Load custom face data into a pandas dataframe for testing purposes 
These are the faces the model should hopefully be able to identify'''


imgpaths = []

custom_data = r'/content/drive/My Drive/Data/FaceDataset/custom_face_data'

for dirpath, dirname, filenames in os.walk(custom_data):
    for filename in filenames:
        if 'JPG' in filename or 'jpg' in filename or 'jpeg' in filename:
            imgpaths.append(os.path.join(dirpath, filename))

imgpaths = [i for i in imgpaths if "DS_Store" not in i]
df = pd.DataFrame(imgpaths, columns = ['ImgPath'])
df['Name'] = df.ImgPath.apply(lambda x: x.split('/')[-2])

In [None]:
# Load Model
input_shape = (96, 96, 3)
embedding_size = 128

# Instantiate Xception Model
xception = Xception(weights="imagenet", input_shape=input_shape, include_top=False)
xception.trainable = False

inputs = Input(shape=input_shape)
# Layer for Xception preprocessing
layer = Lambda(lambda x: (x/127.5)-1)(inputs)
layer = xception(layer)
layer = GlobalMaxPooling2D()(layer)
layer = Dense(embedding_size*4, activation='relu')(layer)
layer = Dense(embedding_size)(layer)
layer = Lambda(lambda x: tf.math.l2_normalize(x, axis=1))(layer)
model = Model(inputs, layer)
model.summary()


Model: "functional_33"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_22 (InputLayer)        [(None, 96, 96, 3)]       0         
_________________________________________________________________
lambda_8 (Lambda)            (None, 96, 96, 3)         0         
_________________________________________________________________
xception (Functional)        (None, 3, 3, 2048)        20861480  
_________________________________________________________________
global_max_pooling2d_4 (Glob (None, 2048)              0         
_________________________________________________________________
dense_36 (Dense)             (None, 512)               1049088   
_________________________________________________________________
dense_37 (Dense)             (None, 128)               65664     
_________________________________________________________________
lambda_9 (Lambda)            (None, 128)             

In [None]:
''' Functions for testing embeddings.
The metric that I have used to test the model is whether the Euclidean distance / cosine similarity between 
the Anchor-Positive example is less/lower than the Anchor-Negative example. 
Due to implementing L2 normalization in the last layer of the embedding model, the
Euclidean distance and cosine similarity produce the same results'''

# This function generates triplets of anchor-positive, anchor-negative pairs for a given X and Y numpy array of equal length

# Converts image array of shape (96, 96, 3) into 128-dimensional array with the trained embedding model
def img_to_emb(img_array, embedding_model):
    return embedding_model.predict(np.expand_dims(img_array, axis=0))


# Function to calculate cosine similarity
def cosine_similarity(v1,v2):
    v1_norm = np.linalg.norm(v1)
    v2_norm = np.linalg.norm(v2)
    cos_alpha = v1@v2.T/(v1_norm*v2_norm)
    angle_radians = np.arccos(cos_alpha)
    return angle_radians


def evaluate_embeddings(anchor_emb, pos_emb, neg_emb, similarity = "Euclidean", verbose=False):
    ''' 
    Function that determines whether the distance between the anchor-positive pair is nearer than the anchor-negative pair
        # Args
          128-dimensional arrays from anchor image, positive image and negative image
          similarity: set to Euclidean distance as default, otherwise, cosine similarity will be used if specified 
          verbose: False, if set to True, will print the distance metric calculation between the respective pairs (for debugging)

        # Returns
          Boolean value
        '''

    if similarity == "Cosine":
        pos_dist = cosine_similarity(anchor_emb, pos_emb)
        neg_dist = cosine_similarity(anchor_emb, neg_emb)
    elif similarity == "Euclidean":
        pos_dist = np.linalg.norm(anchor_emb-pos_emb)
        neg_dist = np.linalg.norm(anchor_emb-neg_emb)
    else:
        raise ValueError('Invalid Metric')
    if verbose is True:
        print(pos_dist, neg_dist)
    if pos_dist < neg_dist:
        return True
    else:
        return False   


detector = MTCNN()
def extract_face(imgpath):
    '''Function that reads the path to an image into an an image array with cv2, converting the color to RGB.
    Uses MTCNN to detect faces in an image and to locate the bounding boxes of the face.
    Crops the image to the bounding boxes of the face. 
    Returns a resized version of the cropped image, with shape (96, 96, 3)
    '''
    img = cv2.imread(imgpath)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    f = detector.detect_faces(img)
    x1, y1, w, h = f[0]['box']
    x1, y1 = abs(x1), abs(y1)
    x2 = abs(x1+w)
    y2 = abs(y1+h)
    face = img[y1:y2, x1:x2]
    face = cv2.resize(face, (96,96))
    return face


def anchor_pos_pairs(identity):
    '''Finds all possible anchor-positive pairs in a given dataframe object.
        # Args:
          string of one of the identities in Name column in pandas dataframe object
        # Returns: 
          list of all possible anchor-positive pairs for a given identity'''

    index = df[df.Name==identity].index
    num = len(index)
    arr = np.empty([num,num], dtype='object')
    for i in range(num):
        for j in range(num):
            arr[i][j] = [index[i], index[j]]
    cleaned = [i for i in np.triu(arr,1).flatten() if type(i) != int]
    return cleaned

def get_all_triplets(identity):
    # Finds all triplets of a given identity using the anchor_pos_pairs function defined above
    
    identity_pairs = anchor_pos_pairs(identity)
    neg_index = df[df.Name!=identity].index
    triplets = []
    for pair in identity_pairs:
        for n in neg_index:
            triplet = pair.copy()
            triplet.append(n)
            triplets.append(triplet)
    return triplets


def most_common(array):
    # Helper function that finds the most common item in a numpy array
    array = list(array)
    return max(set(array), key=array.count)

def knn(frame_enc, encodings, names, neighbors=5):
    '''K-nearest Neighbours algorithm that finds the K-closest face encodings
    to the face encoding of the current frame using Euclidean distance
    # Args:
        frame_enc: image of the current frame from video stream
        encodings: list of encodings to for frame_enc to be compared against
        names: list of names that correspond to each encoding in the list of encodings
        neighbors: number of closest encodings to be calculated
    # Returns:
        The prediction of the identity of the person(s) in the frame based on KNN algorithm'''

    dist_list = np.array([np.linalg.norm(frame_enc - i) for i in encodings])
    idx = np.argsort(dist_list)[:neighbors]
    names = np.array(names)[idx]
    return most_common(names)

In [None]:
def model_eval(weights):
    ''' 
    Evaluates the trained weights 
    # Args
        keras weights file in hdf5 format 
    # Returns
        Accuracy with Euclidean Distance on custom dataset
        Accuracy with KNN on top of Euclidean Distance on custom dataset
        Weighted accuracy with KNN-EUC at 80-20 weighting
    '''
    global model
    model.load_weights(weights)
   
    # Generate Embeddings for each image in custom dataset using the trained embedding model 
    df['Embedding'] = df.copy().ImgPath.apply(lambda x: img_to_emb(extract_face(x), model))
    #df['Embedding'] = df.copy().ImgPath.apply(lambda x: np.array(fr.face_encodings(extract_face(x))[0]))

    # With the helper functions implemented above, find all possible configurations of embeddings within custom dataset
    all_triplets = []
    for name in set(df.Name.tolist()):
        for triplet in get_all_triplets(name):
            all_triplets.append(triplet)

    log_euc = []

    for triplet in all_triplets:
        a, p, n = triplet
        emb_a = df.Embedding.loc[a]
        emb_p = df.Embedding.loc[p]
        emb_n = df.Embedding.loc[n]
        bool_val_euc = evaluate_embeddings(emb_a, emb_p, emb_n)
        log_euc.append(bool_val_euc)

    correct = log_euc.count(True)
    total = len(log_euc)
    euc_acc = correct/total*100
    print(f'Correct : {correct} / {total}')
    print(f'Errors : {total - correct}')
    print(f'ACCURACY with Euclidean Distance - {euc_acc}%')

    # Accuracy with KNN 
    names = df.Name.values
    embs = df.Embedding.values

    nearest = [knn(i, embs, names, 5) for i in embs]
    total = len(names)
    correct = 0
    for x, y in zip(names, nearest):
        if x == y:
            correct += 1
    knn_acc = correct/total*100
    print(f'Correct : {correct} / {total}')
    print(f'Errors : {total - correct}')
    print(f'ACCURACY with KNN5 - {knn_acc}%')

    weighted_acc = 0.2*knn_acc + 0.8*euc_acc
    print('\n')
    print(f'WEIGHTED ACCURACY: {weighted_acc}')
    return euc_acc, knn_acc, weighted_acc

In [None]:
df_eval = pd.read_csv(r'/content/drive/My Drive/Data/FaceDataset/weight_eval.csv')
df_eval

Unnamed: 0,Weights,Euclidean,KNN,Weighted_Acc,Weighted,Temp
0,global_max_pooling_FC_4_1_xception.1.4598.hdf5,81.901659,85.0,82.521327,,
1,global_max_pooling_FC_4_1_xception.1.3681.hdf5,82.316351,82.5,82.353081,,
2,global_max_pooling_FC_4_1_xception.4.3588.hdf5,82.168246,82.5,82.234597,,
3,global_max_pooling_FC_4_1_xception.4.3625.hdf5,82.109005,82.5,82.187204,,
4,global_max_pooling_FC_4_1_xception.1.3571.hdf5,82.227488,80.0,81.781991,,
...,...,...,...,...,...,...
120,global_avg_pooling_xception.0.5317.hdf5,76.777251,62.5,73.921801,69.638626,
121,global_avg_pooling_xception.0.6665.hdf5,76.303318,62.5,73.542654,69.401659,
122,global_max_pooling_FC_1_xception.0.0312.hdf5,74.229858,70.0,73.383886,72.114929,
123,unfrozen_last_xception.0.2641.hdf5,71.919431,67.5,71.035545,69.709716,


In [None]:
repo = '/content/drive/My Drive/Data/FaceDataset'
weights_list = [i for i in os.listdir(repo) if i.endswith('hdf5') and i not in df_eval.Weights.tolist() and 'max' in i ]
print(weights_list)

euc_acc = []
knn_acc = [] 
weighted = []
for w in weights_list:
    print('\n')
    print(f'WEIGHTS: {w}')
    print('\n')
    euc, k_acc, wacc = model_eval(os.path.join(repo,w))
    euc_acc.append(euc)
    knn_acc.append(k_acc)
    weighted.append(wacc)

['global_max_pooling_FC_4_1_xception.1.4245.hdf5', 'global_max_pooling_FC_4_1_xception.1.4292.hdf5']


WEIGHTS: global_max_pooling_FC_4_1_xception.1.4245.hdf5


Correct : 2771 / 3376
Errors : 605
ACCURACY with Euclidean Distance - 82.07938388625593%
Correct : 30 / 40
Errors : 10
ACCURACY with KNN5 - 75.0%


WEIGHTED ACCURACY: 80.66350710900474


WEIGHTS: global_max_pooling_FC_4_1_xception.1.4292.hdf5


Correct : 2763 / 3376
Errors : 613
ACCURACY with Euclidean Distance - 81.84241706161137%
Correct : 30 / 40
Errors : 10
ACCURACY with KNN5 - 75.0%


WEIGHTED ACCURACY: 80.4739336492891


In [None]:
new_df_eval = pd.DataFrame({'Weights': weights_list, 'Euclidean': euc_acc, 'KNN': knn_acc, 'Weighted_Acc': weighted})
df_eval = df_eval.append(new_df_eval)   
df_eval = df_eval.sort_values(by=['Weighted_Acc'], ascending=False)
df_eval.reset_index(drop=True, inplace=True)
df_eval[:15]

Unnamed: 0,Weights,Euclidean,KNN,Weighted_Acc,Weighted,Temp
0,global_max_pooling_FC_4_1_xception.1.4598.hdf5,81.901659,85.0,82.521327,,
1,global_max_pooling_FC_4_1_xception.1.3681.hdf5,82.316351,82.5,82.353081,,
2,global_max_pooling_FC_4_1_xception.4.3588.hdf5,82.168246,82.5,82.234597,,
3,global_max_pooling_FC_4_1_xception.4.3625.hdf5,82.109005,82.5,82.187204,,
4,global_max_pooling_FC_4_1_xception.1.3571.hdf5,82.227488,80.0,81.781991,,
5,global_max_pooling_FC_4_1_xception.4.3533.hdf5,81.872038,80.0,81.49763,,
6,1M-2FC_xception_weights.0.6279.hdf5,82.494076,77.5,81.495261,79.997038,
7,Xception,81.131517,82.5,81.405213,81.815758,
8,global_max_pooling_FC_4_1_xception.1.3490.hdf5,82.375592,77.5,81.400474,,
9,global_max_pooling_FC_4_1_xception.4.3661.hdf5,81.635071,80.0,81.308057,,


In [None]:
df_eval.to_csv(r'/content/drive/My Drive/Data/FaceDataset/weight_eval.csv', index=False)

In [None]:
df2 = df_eval.copy() 
df2['Temp'] = df2.Weights.apply(lambda x: x if "max" in str(x) else None)
df2 = df2.drop(columns=['Weighted'])
df2['Weighted'] = df2.Euclidean * 0.5 + df2.KNN * 0.5
df2 = df2.sort_values(by=['Weighted'], ascending=False)

df2['WeightedSum'] = df2.Weighted * 0.5 + df2.Weighted_Acc * 0.5 
df2 = df2.sort_values(by=['WeightedSum'], ascending=False)
df2.dropna()


Unnamed: 0,Weights,Euclidean,KNN,Weighted_Acc,Temp,Weighted,WeightedSum
0,global_max_pooling_FC_4_1_xception.1.4598.hdf5,81.901659,85.0,82.521327,global_max_pooling_FC_4_1_xception.1.4598.hdf5,83.450829,82.986078
1,global_max_pooling_FC_4_1_xception.1.3681.hdf5,82.316351,82.5,82.353081,global_max_pooling_FC_4_1_xception.1.3681.hdf5,82.408175,82.380628
2,global_max_pooling_FC_4_1_xception.4.3588.hdf5,82.168246,82.5,82.234597,global_max_pooling_FC_4_1_xception.4.3588.hdf5,82.334123,82.284360
3,global_max_pooling_FC_4_1_xception.4.3625.hdf5,82.109005,82.5,82.187204,global_max_pooling_FC_4_1_xception.4.3625.hdf5,82.304502,82.245853
16,global_max_pooling_FC_4_1_xception.0.4527.hdf5,79.206161,87.5,80.864929,global_max_pooling_FC_4_1_xception.0.4527.hdf5,83.353081,82.109005
...,...,...,...,...,...,...,...
114,global_max_pooling_FC_1_xception.0.0518.hdf5,74.111374,77.5,74.789100,global_max_pooling_FC_1_xception.0.0518.hdf5,75.805687,75.297393
120,global_max_pooling_FC_4_1_xception.0.0040.hdf5,73.193128,77.5,74.054502,global_max_pooling_FC_4_1_xception.0.0040.hdf5,75.346564,74.700533
101,global_max_pooling_FC_4_1_xception.0.6716.hdf5,78.554502,67.5,76.343602,global_max_pooling_FC_4_1_xception.0.6716.hdf5,73.027251,74.685427
124,global_max_pooling_FC_1_xception.0.0312.hdf5,74.229858,70.0,73.383886,global_max_pooling_FC_1_xception.0.0312.hdf5,72.114929,72.749408


In [None]:
df_eval.Weighted = df_eval.Euclidean * 0.5 + df_eval.KNN * 0.5
df_eval = df_eval.sort_values(by=['Weighted'], ascending=False)
df_eval

Unnamed: 0,Weights,Euclidean,KNN,Weighted_Acc,Weighted,Temp
0,global_max_pooling_FC_4_1_xception.1.4598.hdf5,81.901659,85.0,82.521327,83.450829,
16,global_max_pooling_FC_4_1_xception.0.4527.hdf5,79.206161,87.5,80.864929,83.353081,
29,global_max_pooling_FC_4_1_xception.0.5202.hdf5,78.524882,87.5,80.319905,83.012441,
1,global_max_pooling_FC_4_1_xception.1.3681.hdf5,82.316351,82.5,82.353081,82.408175,
21,global_max_pooling_FC_4_1_xception.0.4427.hdf5,79.680095,85.0,80.744076,82.340047,
...,...,...,...,...,...,...
125,unfrozen_last_xception.0.2641.hdf5,71.919431,67.5,71.035545,69.709716,
122,global_avg_pooling_xception.0.5317.hdf5,76.777251,62.5,73.921801,69.638626,
123,global_avg_pooling_xception.0.6665.hdf5,76.303318,62.5,73.542654,69.401659,
103,FINETUNE_1M-2FC_4_1_xception_weights.0.6694.hdf5,80.627962,57.5,76.002370,69.063981,FINETUNE_1M-2FC_4_1_xception_weights.0.6694.hdf5
