In [1]:
from sklearn.datasets import fetch_lfw_people
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
import random
from tensorflow.keras import layers, Model, regularizers
from tensorflow.keras.layers import BatchNormalization, Dropout
import tensorflow.keras.backend as K
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

# **Data Collection**

In [None]:

lfw = fetch_lfw_people(color=True, min_faces_per_person=3)  
X = lfw.images
y = lfw.target
names = lfw.target_names
print(X.shape)        
print(len(names))     
print(set(y))         
print(names[y[0]])    

(7606, 62, 47, 3)
901
{np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29), np.int64(30), np.int64(31), np.int64(32), np.int64(33), np.int64(34), np.int64(35), np.int64(36), np.int64(37), np.int64(38), np.int64(39), np.int64(40), np.int64(41), np.int64(42), np.int64(43), np.int64(44), np.int64(45), np.int64(46), np.int64(47), np.int64(48), np.int64(49), np.int64(50), np.int64(51), np.int64(52), np.int64(53), np.int64(54), np.int64(55), np.int64(56), np.int64(57), np.int64(58), np.int64(59), np.int64(60), np.int64(61), np.int64(62), np.int64(63), np.int64(64), np.int64(65), np.int64(66), np.int64(67), np.int64(68), np.int64(69), np.int6

# **Preprocessing**

In [None]:

X = lfw.images.astype("float32") / 255.0
X_resized = np.array([tf.image.resize(img, (32, 32)).numpy() for img in X])  


X_train, X_test, y_train, y_test = train_test_split(
    X_resized, y, test_size=0.2, stratify=y, random_state=42
)

print("Train set:", X_train.shape, "Test set:", X_test.shape)

Train set: (6084, 32, 32, 3) Test set: (1522, 32, 32, 3)


# **Generate Triplets**

In [None]:
def generate_triplets(X, y, num_triplets=5000):
    triplets = []
    labels = np.unique(y)

    
    class_to_indices = {}
    for c in labels:
        class_to_indices[c] = np.where(y == c)[0]

    
    valid_classes = [c for c in labels if len(class_to_indices[c]) >= 2]

    for _ in range(num_triplets):
        anchor_label = random.choice(valid_classes)
        anchor_idx, pos_idx = np.random.choice(class_to_indices[anchor_label], 2, replace=False)

        
        neg_classes = [c for c in valid_classes if c != anchor_label]
        neg_label = random.choice(neg_classes)
        neg_idx = np.random.choice(class_to_indices[neg_label])

        triplets.append((X[anchor_idx], X[pos_idx], X[neg_idx]))

    return np.array(triplets)


train_triplets = generate_triplets(X_train, y_train, num_triplets=30000)
print("Train triplets:", train_triplets.shape)


Train triplets: (30000, 3, 32, 32, 3)


# **CNN**

In [6]:
def embedding_model(input_shape=(32, 32, 3), embedding_dim=128):
    inputs = layers.Input(shape=input_shape)

    # Block 1
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    x = Dropout(0.2)(x)

    # Block 2
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    x = Dropout(0.3)(x)

    # Block 3
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    x = Dropout(0.4)(x)

    # Dense embedding
    x = layers.Flatten()(x)
    x = layers.Dense(
        256,
        activation='relu',
        kernel_regularizer=regularizers.l2(0.001)
    )(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

    x = layers.Dense(embedding_dim)(x)

    # L2-normalized output
    outputs = layers.Lambda(lambda t: tf.math.l2_normalize(t, axis=1))(x)

    return Model(inputs, outputs)

embedder = embedding_model()
embedder.summary()

# **Triplet loss**

In [None]:
def triplet_loss(margin=0.5):
    def loss(y_true, y_pred):
        anchor, positive, negative = y_pred[:, 0, :], y_pred[:, 1, :], y_pred[:, 2, :]
        pos_dist = K.sum(K.square(anchor - positive), axis=1)
        neg_dist = K.sum(K.square(anchor - negative), axis=1)
        return K.mean(K.maximum(pos_dist - neg_dist + margin, 0))
    return loss

In [None]:
def build_triplet_model(embedder, input_shape=(32,32, 3)):
    anchor_input = layers.Input(input_shape)
    positive_input = layers.Input(input_shape)
    negative_input = layers.Input(input_shape)

    anchor_emb = embedder(anchor_input)
    pos_emb = embedder(positive_input)
    neg_emb = embedder(negative_input)

    merged = layers.Concatenate(axis=1)([anchor_emb, pos_emb, neg_emb])

    # Reshape to have a third dimension
    output = layers.Reshape((3, embedder.output_shape[-1]))(merged)

    model = Model([anchor_input, positive_input, negative_input], output)
    return model 

In [9]:
triplet_model = build_triplet_model(embedder)
triplet_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=triplet_loss(margin=0.5))

anchors = train_triplets[:, 0]
positives = train_triplets[:, 1]
negatives = train_triplets[:, 2]

# Generate validation triplets
val_triplets = generate_triplets(X_test, y_test, num_triplets=5000)
val_anchors = val_triplets[:, 0]
val_positives = val_triplets[:, 1]
val_negatives = val_triplets[:, 2]

# Add callbacks
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True
)



# **Train the model**

In [None]:

history = triplet_model.fit(
    [anchors, positives, negatives],
    np.zeros(len(anchors)),  # dummy labels
    batch_size=32,
    epochs=30,
    validation_data=([val_anchors, val_positives, val_negatives], np.zeros(len(val_anchors))),
    callbacks=[reduce_lr, early_stopping],
    verbose=1
)

Epoch 1/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m333s[0m 347ms/step - loss: 0.8745 - val_loss: 0.7654 - learning_rate: 1.0000e-04
Epoch 2/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m400s[0m 366ms/step - loss: 0.7324 - val_loss: 0.6642 - learning_rate: 1.0000e-04
Epoch 3/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m379s[0m 363ms/step - loss: 0.6342 - val_loss: 0.5854 - learning_rate: 1.0000e-04
Epoch 4/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m366s[0m 346ms/step - loss: 0.5615 - val_loss: 0.5127 - learning_rate: 1.0000e-04
Epoch 5/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m374s[0m 338ms/step - loss: 0.5015 - val_loss: 0.4634 - learning_rate: 1.0000e-04
Epoch 6/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 355ms/step - loss: 0.4469 - val_loss: 0.4233 - learning_rate: 1.0000e-04
Epoch 7/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m377s[0m 350ms/ste

# **Evaluation**

In [None]:

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Triplet Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [11]:
def evaluate_verification(embedder, X, y, num_pairs=1000, threshold=0.7):
    pairs = []
    labels = []

    unique_classes = np.unique(y)
    # Precompute indices for each class
    class_to_indices = {c: np.where(y == c)[0] for c in unique_classes}

    # Filter classes with at least 2 samples
    valid_classes = [c for c in unique_classes if len(class_to_indices[c]) >= 2]

    for _ in range(num_pairs):
        if np.random.rand() > 0.5:  # positive pair
            c = np.random.choice(valid_classes)
            a, b = np.random.choice(class_to_indices[c], 2, replace=False)
            pairs.append((X[a], X[b]))
            labels.append(1)
        else:  # negative pair
            c1, c2 = np.random.choice(valid_classes, 2, replace=False)
            a = np.random.choice(class_to_indices[c1])
            b = np.random.choice(class_to_indices[c2])
            pairs.append((X[a], X[b]))
            labels.append(0)

    X1 = np.array([p[0] for p in pairs])
    X2 = np.array([p[1] for p in pairs])
    y_true = np.array(labels)

    emb1 = embedder.predict(X1, verbose=0)
    emb2 = embedder.predict(X2, verbose=0)

    distances = np.linalg.norm(emb1 - emb2, axis=1)
    y_pred = (distances < threshold).astype(int)

    acc = accuracy_score(y_true, y_pred)

    # Calculate precision and recall
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    return acc, precision, recall, distances, y_true

# Evaluate with different thresholds to find the optimal one
thresholds = np.arange(0.5, 1.0, 0.05)
best_threshold = 0.7
best_acc = 0

for threshold in thresholds:
    train_acc, train_precision, train_recall, _, _ = evaluate_verification(embedder, X_train, y_train, num_pairs=2000, threshold=threshold)
    if train_acc > best_acc:
        best_acc = train_acc
        best_threshold = threshold

print(f"Best threshold: {best_threshold:.2f}")

Best threshold: 0.95


In [None]:
#  with the best threshold
train_acc, train_precision, train_recall, _, _ = evaluate_verification(embedder, X_train, y_train, num_pairs=2000, threshold=best_threshold)
test_acc, test_precision, test_recall, _, _ = evaluate_verification(embedder, X_test, y_test, num_pairs=2000, threshold=best_threshold)

print(f"Verification Accuracy (Train): {train_acc:.4f}, Precision: {train_precision:.4f}, Recall: {train_recall:.4f}")
print(f"Verification Accuracy (Test): {test_acc:.4f}, Precision: {test_precision:.4f}, Recall: {test_recall:.4f}")

Verification Accuracy (Train): 0.9065, Precision: 0.9183, Recall: 0.8873
Verification Accuracy (Test): 0.7760, Precision: 0.8708, Recall: 0.6352


# **Save The Model**

In [None]:

embedder.save('lfw_embedding_model.h5')
print("Model saved as 'lfw_embedding_model.h5'")



Model saved as 'lfw_embedding_model.h5'


In [15]:
embedder.save_weights("lfw_embedding_model.weights.h5")

In [16]:
embedder.save("lfw_embedding_model.keras")