In this notebook different quantisation methods and distance metrics for Facial Recognition will be compared both on accuracy and execution time. 

The Quantisation methods include:
- Scalar Quantisation
- TensorFlow Quantisation

The distance metrics include:
- Cosine Similarity
- Euclidean Distance

Below are the necassary import to run the code.

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # suppress tensorflow warnings https://stackoverflow.com/a/40871012
from deepface import DeepFace
import subprocess
import numpy as np
from decimal import Decimal # for proper rounding
import random
import time
import pandas as pd
from datetime import datetime
import struct
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import statistics
import accuracy as ac
import pickle
import quantisations as qt
import basics as bs


# CONSTANTS
EXECUTABLE_PATH = "ABY/build/bin"
INPUT_FILE_NAME = "input_vecs.txt"
EXECUTABLE_NAME_SCENARIO = 'cos_dist_copy'
CMD_SCENARIO = f"./{EXECUTABLE_NAME_SCENARIO} -r 1 -f {INPUT_FILE_NAME} & (./{EXECUTABLE_NAME_SCENARIO} -r 0 -f {INPUT_FILE_NAME} 2>&1 > /dev/null)"

# random number generator
rng = np.random.default_rng()

Here we test if quantisation works

In [2]:
embedding1 = bs.get_embedding("lfw/George_W_Bush/George_W_Bush_0001.jpg")
embedding2 = bs.get_embedding("lfw/George_W_Bush/George_W_Bush_0002.jpg")
bs.get_cos_dist_numpy(embedding1, embedding2)
# print(embedding1)
embedding1_quant=qt.scalar_quantisation_percentile(embedding1)
embedding2_quant=qt.quantize_tensor(embedding2)
print("size before scalar quantisation:" ,sys.getsizeof(embedding1), ", size after scalar quantisation:",sys.getsizeof(embedding1_quant)) 
print("size before tensor quantisation:", sys.getsizeof(embedding2), ", size after tensor quantisation:",sys.getsizeof(embedding2_quant)) 
print("the type of the elements in the scalar quantisation is:", type(embedding1_quant[0]), "in the non quantised embedding it was:",type(embedding1[0]))
print("the type of the elements in the tensor quantisation is:", type(embedding2_quant[0]), "in the non quantised embedding it was:",type(embedding2[0]))

size before scalar quantisation: 1080 , size after scalar quantisation: 240
size before tensor quantisation: 1080 , size after tensor quantisation: 240
the type of the elements in the scalar quantisation is: <class 'numpy.int8'> in the non quantised embedding it was: <class 'float'>
the type of the elements in the tensor quantisation is: <class 'numpy.int8'> in the non quantised embedding it was: <class 'float'>


Below are two functions to compare Facenet and Sface accuracy. One for Euclidean Distance and one for Cosine Similarity. The code to create a visual representation for this comparison is also included.

In [None]:
### GENERATING THE GLOBAL PAIRS. 
### we only need to run this once and then we can have the file and use pairs as the list of embeddings 
### uncomment to use for first them, then use the next cell!
### since we are using the github you already have the file so just use the next cell!!


# Generate pairs globally
def generate_pairs(m):
    pairs = []
    for _ in range(m):
        print(_)
        n = random.choice([True, False])
        a, b, imga, imgb = bs.get_two_random_embeddings_facenet(same_person=n)
        pairs.append((a, b, imga, imgb))
    return pairs

m = 1000
pairs = generate_pairs(m)


# Save pairs to a file
with open('pairs.pkl', 'wb') as file:
    pickle.dump(pairs, file)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130


In [None]:
with open('pairs.pkl', 'rb') as file:
    pairs = pickle.load(file)

In [None]:
# Use Euclidean distance
correct_f_euc, incorrect_f_euc, correct_s_euc, incorrect_s_euc = ac.compare_accuracies_euc(pairs)





In [None]:
# Use Euclidean distance
correct_f_cos, incorrect_f_cos, correct_s_cos, incorrect_s_cos = ac.compare_accuracies_cos(pairs, m)





In [None]:
print("facenet euclidean - correct :", correct_f_euc, "incorrect:", incorrect_f_euc)
print("sface euclidean - correct:", correct_s_euc, "incorrect:", incorrect_s_euc)

print("facenet cosine - correct :", correct_f_cos, "incorrect:", incorrect_f_euc)
print("sface cosine - correct:", correct_s_cos, "incorrect:", incorrect_s_cos)

In [None]:
# Create visualisation for ED

# Data
methods = ['No quantisation', 'Scalar Quantisation (8 bits)', 'Tensorflow Quantisation (8 bits)']
correct_sface_euc = correct_s_euc
incorrect_sface_cos = incorrect_s_euc
correct_sface_cos = correct_s_cos
incorrect_sface_cos = incorrect_s_cos
correct_facenet_euc = correct_f_euc
incorrect_facenet_euc = incorrect_f_euc
correct_facenet_cos = correct_f_cos
incorrect_facenet_cos = incorrect_f_cos

# Number of methods
n_methods = len(methods)

# Position of bars on X axis
ind = np.arange(n_methods)

# Width of a bar 
width = 0.2       

# Create the plot
fig, ax = plt.subplots(figsize=(10, 6))

# Bar plots for SFace
bar1 = ax.bar(ind - width * 1.5, correct_sface_euc, width, label='Correct Euclidean (SFace)', color='skyblue')
bar2 = ax.bar(ind - width * 0.5, incorrect_sface_euc, width, label='Incorrect Euclidean (SFace)', color='palevioletred')

bar3 = ax.bar(ind - width * 1.5, correct_sface_cos, width, label='Correct Cosine(SFace)', color='orange')
bar4 = ax.bar(ind - width * 0.5, incorrect_sface_cos, width, label='Incorrect Cosine (SFace)', color='lightpurple')


# Bar plots for Facenet
bar5 = ax.bar(ind + width * 0.5, correct_facenet_euc, width, label='Correct Euclidean (Facenet)', color='lightgreen')
bar6 = ax.bar(ind + width * 1.5, incorrect_facenet_euc, width, label='Incorrect Euclidean (Facenet)', color='blueviolet')

bar7 = ax.bar(ind + width * 0.5, correct_facenet_cos, width, label='Correct Cosine (Facenet)', color='lightgreen')
bar8 = ax.bar(ind + width * 1.5, incorrect_facenet_cos, width, label='Incorrect Consine(Facenet)', color='blueviolet')

# Adding text labels for all bars
for bars in [bar1, bar2, bar3, bar4, bar5, bar6, bar7, bar8]:
    for i, bar in enumerate(bars):
        ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, str(int(bar.get_height())), ha='center', va='bottom')

# Labels, title and axes ticks
ax.set_xlabel('Quantisation Methods')
ax.set_ylabel('Values')
ax.set_title('Accuracy Comparison of Different Quantisation Methods using SFace and Facenet (Euclidean Distance and Cosine Similarity)')
ax.set_xticks(ind)
ax.set_xticklabels(methods, rotation=45, ha='right')
ax.legend(loc='upper right', bbox_to_anchor=(1.2, 1))

# Display the plot
plt.show()


below will be the functions to compare the execution time of (Facenet, SFace) x (Euclidean, Cosine) X (no quantisation, Tensorflow, scalar)

Below is the code to draw to figures visualising the accuracy, one for facenet and one for sface

In [None]:
# Define your experiments
experiments = [
    {"n": 1000, "embedding_func": get_embedding_facenet, "quantize_func": None, "distance_func": get_cos_dist_numpy, "quantize": False},
    {"n": 1000, "embedding_func": get_embedding_facenet, "quantize_func": None, "distance_func": euclidean_distance, "quantize": False},
    {"n": 1000, "embedding_func": get_embedding_facenet, "quantize_func": scalar_quantisation_percentile, "distance_func": get_cos_dist_numpy, "quantize": True},
    {"n": 1000, "embedding_func": get_embedding_facenet, "quantize_func": scalar_quantisation_percentile, "distance_func": euclidean_distance, "quantize": True},
    {"n": 1000, "embedding_func": get_embedding_facenet, "quantize_func": quantize_tensor, "distance_func": get_cos_dist_numpy, "quantize": True},
    {"n": 1000, "embedding_func": get_embedding_facenet, "quantize_func": quantize_tensor, "distance_func": euclidean_distance, "quantize": True},
    {"n": 1000, "embedding_func": get_embedding, "quantize_func": None, "distance_func": get_cos_dist_numpy, "quantize": False},
    {"n": 1000, "embedding_func": get_embedding, "quantize_func": None, "distance_func": euclidean_distance, "quantize": False},
    {"n": 1000, "embedding_func": get_embedding, "quantize_func": scalar_quantisation_percentile, "distance_func": get_cos_dist_numpy, "quantize": True},
    {"n": 1000, "embedding_func": get_embedding, "quantize_func": scalar_quantisation_percentile, "distance_func": euclidean_distance, "quantize": True},
    {"n": 1000, "embedding_func": get_embedding, "quantize_func": quantize_tensor, "distance_func": get_cos_dist_numpy, "quantize": True},
    {"n": 1000, "embedding_func": get_embedding, "quantize_func": quantize_tensor, "distance_func": euclidean_distance, "quantize": True},
]

In [None]:
def run_experiment(n, get_embedding_func, quantize_func, distance_func, quantize=False):
    execution_times = []

    for _ in range(n):
        print(_)
        same_person = random.choice([True, False])
        a, b, imga, imgb = get_two_random_embeddings(same_person=same_person)
        start_time = time.time()
        a = get_embedding_func(imga)
        b = get_embedding_func(imgb)
        
        if quantize:
            a = quantize_func(a)
            b = quantize_func(b)
        
        if distance_func == get_cos_dist_numpy:
            a = a / np.linalg.norm(a)
            b = b / np.linalg.norm(b)
        
        distance_func(a, b)
        end_time = time.time()

        execution_time = end_time - start_time
        execution_times.append(execution_time)
    
    return execution_times

In [None]:
# Run all experiments
results = {}
for i, experiment in enumerate(experiments):
    key = f"experiment_{i+1}"
    results[key] = run_experiment(**experiment)
    print(f"{key} completed")

In [None]:
# Define a dictionary with experiment names for readability
experiment_names = {
    "experiment_1": "avg_execution_times_no_quantisation_facenet_cos",
    "experiment_2": "avg_execution_times_no_quantisation_facenet_ed",
    "experiment_3": "avg_execution_times_scalar_quantisation_facenet_cos",
    "experiment_4": "avg_execution_times_scalar_quantisation_facenet_ed",
    "experiment_5": "avg_execution_times_tensorflow_quantisation_facenet_cos",
    "experiment_6": "avg_execution_times_tensorflow_quantisation_facenet_ed",
    "experiment_7": "avg_execution_times_no_quantisation_sface_cos",
    "experiment_8": "avg_execution_times_no_quantisation_sface_ed",
    "experiment_9": "avg_execution_times_scalar_quantisation_sface_cos",
    "experiment_10": "avg_execution_times_scalar_quantisation_sface_ed",
    "experiment_11": "avg_execution_times_tensorflow_quantisation_sface_cos",
    "experiment_12": "avg_execution_times_tensorflow_quantisation_sface_ed",
}

In [None]:
# Calculate and print the average execution times
for key, name in experiment_names.items():
    avg_time = statistics.mean(results[key])
    print(f"{name} = {avg_time}")