# Statistical Analysis of Binary Biometric Embeddings

In this notebook, we analyze the statistical properties of binary biometric embeddings extracted from face images. We explore the distribution of Hamming weights, compute Hamming distances between different users, and evaluate system performance by calculating the False Acceptance Rate (FAR) and False Rejection Rate (FRR). These analyses help in understanding the discriminative power and robustness of the biometric system.


In [None]:
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
import plotly.io as pio
from err_calc_npy import load_float_embeddings, binarize_all, generar_pares, calcular_distancias, evaluar_umbral, graficar_interactivo, compute_err, encontrar_eer

In [2]:
# Load embeddings and binarize data
model = 512
float_dim = model
bits = 4 #3 or 4
if bits == 3:
    t1 = 0.1
    t2 = None
if bits == 4:
    t1 = 0.12
    t2 = 0.04

dataset_dir = f"./embeddings{model}_float_LFW"
data_f = load_float_embeddings(dataset_dir, float_dim)
data_b = binarize_all(data_f, bits, t1, t2) # Takes 4 secs approx

In [3]:
def epsilon(k, n, t):
    """
    Calculate the epsilon value based on the given parameters.
    
    Parameters:
    k (int): Code dimension.
    n (int): Code length.
    t (float): number of corrected errors.
    
    Returns:
    float: The calculated epsilon value.
    """
    if k > n or k < 0 or n <= 0:
        raise ValueError("Invalid values for k and n.")
    
    # Calculate epsilon using the formula
    p = (t/n)
    ϵ = -1 + k/n - p*np.log2(p) - (1 - p)*np.log2(1 - p)
    return ϵ
    

In [4]:
epsilon(1217, 1634, 180)

np.float64(0.24519376340980123)

In [None]:
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display, clear_output

genuinos, impostores = generar_pares(data_b)
dist_g = calcular_distancias(genuinos)
dist_i = calcular_distancias(impostores)


bin_length = next(iter(data_b.values())).shape[1]
thresholds = list(range(0, bin_length + 1))
fars, frrs = evaluar_umbral(dist_g, dist_i, thresholds)
eer_th, eer_val = encontrar_eer(thresholds, fars, frrs)


weights = [int(arr[0].sum()) for arr in data_b.values()]
weights_sample = weights  # Use a sample for speed

k_slider = widgets.IntSlider(value=1217, min=1200, max=2000, step=1, description='k', continuous_update=False)
n_slider = widgets.IntSlider(value=1634, min=1500, max=3000, step=1, description='n', continuous_update=False)
t_slider = widgets.IntSlider(value=600, min=0, max=1200, step = 1, description='t', continuous_update=False)
out = widgets.Output()


print("Gereating figure...")


def update_histogram(change=None):
    k = k_slider.value
    n = n_slider.value
    t = t_slider.value
    eps = epsilon(k, n, t)
    left = min(t, n - t)
    right = max(t, n - t)

    # Create subplot with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Histogram (primary y-axis)
    fig.add_trace(
        go.Histogram(x=weights_sample, nbinsx=30, name="Weights"),
        secondary_y=False,
    )
    # FAR and FRR (secondary y-axis)
    fig.add_trace(
        go.Scatter(x=thresholds, y=fars, name="FAR", mode="lines"),
        secondary_y=True,
    )
    fig.add_trace(
        go.Scatter(x=thresholds, y=frrs, name="FRR", mode="lines"),
        secondary_y=True,
    )
    fig.add_trace(
        go.Scatter(
            x=[eer_th], y=[eer_val],
            name="EER", mode="markers+text",
            text=[f"{eer_val:.2f}%"], textposition="top right"
        ),
        secondary_y=True,
    )

    # Add vertical line and shaded region
    fig.add_shape(
        name="Non-encrypted region",
        type="line",
        x0=eer_th, x1=eer_th,
        y0=0, y1=1,
        yref="paper",
        line=dict(dash="dash"),
    )
    fig.add_shape(
        type="rect",
        x0=left, x1=right,
        y0=0, y1=1,
        yref="paper",
        fillcolor="rgba(200,0,0,0.2)",
        line=dict(width=0),
        layer="below"
    )

    fig.update_layout(
        title=f"Hamming Weights Distribution (security= {(eps*n/2)}, k = {k}, n = {n})",
        xaxis_title="Umbral de Hamming",
        yaxis_title="Histogram",
        yaxis2_title="FAR/FRR (%)",
        legend=dict(x=0.01, y=0.99),
        template="plotly_white"
    )
    with out:
        clear_output(wait=True)
        fig.show()

Personas seleccionadas para pares genuinos: 1272


Generando pares genuinos: 100%|██████████| 1272/1272 [00:00<00:00, 8889.58it/s]
Generando pares impostores: 100%|██████████| 100000/100000 [00:00<00:00, 129558.95it/s]


Gereating figure...
