# Model Evaluaion
This notebook is used to evaluate the model performance on a separate test set that was not used during training

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import libraries
import matplotlib.pyplot as plt
import numpy as np
import os
import time
import random
from pathlib import Path
import tensorflow as tf
from keras import layers
from keras import losses
from keras import ops
from keras import metrics
from keras import Model
import keras

target_shape = (75, 75)

In [None]:
# Preprocessed dataset saved in drive to read directly

# anchor_path = '/content/drive/MyDrive/celebA Dataset/Matching_triplets/anchor_image.npy'
# pos_path = '/content/drive/MyDrive/celebA Dataset/Matching_triplets/pos_image.npy'
# neg_path = '/content/drive/MyDrive/celebA Dataset/Matching_triplets/neg_image.npy'
# gender_path = '/content/drive/MyDrive/celebA Dataset/Matching_triplets/anchor_gender.npy'
# age_path = '/content/drive/MyDrive/celebA Dataset/Matching_triplets/anchor_age.npy'

# saved_model_dir = '/content/drive/MyDrive/celebA Dataset/weights/complete_dataset'

anchor_path = '/content/drive/MyDrive/DEPI_Project/Matching_triplets/anchor_image.npy'
pos_path = '/content/drive/MyDrive/DEPI_Project/Matching_triplets/pos_image.npy'
neg_path = '/content/drive/MyDrive/DEPI_Project/Matching_triplets/neg_image.npy'
gender_path = '/content/drive/MyDrive/DEPI_Project/Matching_triplets/anchor_gender.npy'
age_path = '/content/drive/MyDrive/DEPI_Project/Matching_triplets/anchor_age.npy'

saved_model_dir = '/content/drive/MyDrive/DEPI_Project/weights'

In [None]:

def preprocess_image(image):
    """
    Preprocess the input image by resizing it to the target shape.
    """
    image = tf.image.resize(image, target_shape)
    image = tf.image.convert_image_dtype(image, tf.float32)  # Ensure float32
    return image

In [None]:
@keras.saving.register_keras_serializable()
class DistanceLayer(layers.Layer):
    """
    This layer is responsible for computing the distance between the anchor
    embedding and a test image embedding. The test image may be positive or
    negative sample.
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, anchor, test):
        at_distance = ops.sum(tf.square(anchor - test), -1)     # distance between anchor and test images
        return at_distance


In [None]:
def test_dataset_generator(batch_size, mode='test'):
    """
    Generator function to yield batches of triplets for testing.
    """
    # Load the full datasets using memory mapping
    num_used_samples_train = 15000
    test_samples = 3000
    anchor_images = np.load(anchor_path, mmap_mode='r')[num_used_samples_train:18000]
    positive_images = np.load(pos_path, mmap_mode='r')[num_used_samples_train:18000]
    negative_images = np.load(neg_path, mmap_mode='r')[num_used_samples_train:18000]
    anchor_gender = np.load(gender_path, mmap_mode='r')[num_used_samples_train:18000]
    anchor_age = np.load(age_path, mmap_mode='r')[num_used_samples_train:18000]

    total_batches = test_samples // batch_size

    indices = np.arange(0, test_samples)

    i = 0
    while i < total_batches:  # Loop on all dataset to generate batches
        batch_indices = np.random.choice(indices, batch_size)
        anchor_batch = np.array([preprocess_image(anchor_images[i]) for i in batch_indices])
        pos_batch = np.array([preprocess_image(positive_images[i]) for i in batch_indices])
        neg_batch = np.array([preprocess_image(negative_images[i]) for i in batch_indices])
        gender_batch = np.array([anchor_gender[i] for i in batch_indices])
        age_batch = np.array([anchor_age[i] for i in batch_indices])
        i += 1
        yield anchor_batch, pos_batch, neg_batch, gender_batch, age_batch



# Define batch size
BATCH_SIZE = 256

# Create dataset
test_dataset = tf.data.Dataset.from_generator(
    lambda: test_dataset_generator(BATCH_SIZE, mode='test'),
    output_signature=(
        tf.TensorSpec(shape=(BATCH_SIZE, *target_shape, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(BATCH_SIZE, *target_shape, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(BATCH_SIZE, *target_shape, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(BATCH_SIZE, ), dtype=tf.float32),
        tf.TensorSpec(shape=(BATCH_SIZE, ), dtype=tf.float32),
    )
)

# Prefetch datasets for efficiency
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [None]:
# Metrics
test_age_acc_metric = tf.keras.metrics.BinaryAccuracy()
test_gender_acc_metric = tf.keras.metrics.BinaryAccuracy()
test_pos_acc_metric = tf.keras.metrics.BinaryAccuracy()
test_neg_acc_metric = tf.keras.metrics.BinaryAccuracy()

# Load model weights
complete_model = tf.keras.models.load_model(os.path.join(saved_model_dir, f'model_epoch_30.keras'))

# Remove the negative image branch from the model
inference_model = Model(complete_model.inputs[:2],
                        complete_model.outputs[:3])

In [None]:
def eval_model(inference_model, dataset):
  '''
   function to evaluate the model performance on a test dataset
   Saved metrics: age accuracy
                  gender accuracy
                  positive verification accuracy
                  negative verification accuracy
  '''
  num_of_batches = 11
  dist_threshold = 1     # Can be tuned

  age_predictions = []
  age_labels = []
  gender_predictions = []
  gender_labels = []

  dataset_iter = iter(dataset)
  for batch in range(num_of_batches):
    verification_pos_output = []
    verification_neg_output = []
    anchor_batch, pos_batch, neg_batch, gender_batch, age_batch = next(dataset_iter)
    gender_output, age_output, ap_distance = inference_model([anchor_batch, pos_batch], training=False)
    _, _, an_distance = inference_model([anchor_batch, neg_batch], training=False)

    age_predictions.append(age_output)
    age_labels.append(age_batch)
    gender_predictions.append(gender_output)
    gender_labels.append(gender_batch)

    for dist in ap_distance:
      verification_pos_output.append(1 if dist < dist_threshold else 0)

    for dist in an_distance:
      verification_neg_output.append(1 if dist < dist_threshold else 0)

    # Update metrics
    test_age_acc_metric.update_state(tf.reshape(age_batch, [256, 1]), age_output)
    test_gender_acc_metric.update_state(tf.reshape(gender_batch, [256, 1]), gender_output)

    test_pos_acc_metric.update_state(tf.ones([256, 1]), verification_pos_output)
    test_neg_acc_metric.update_state(tf.zeros([256, 1]), verification_neg_output)

  return age_predictions, age_labels, gender_predictions, gender_labels

In [None]:
# Run evaluation and read metrics
age_predictions, age_labels, gender_predictions, gender_labels = eval_model(inference_model, test_dataset)

test_age_acc = test_age_acc_metric.result()
test_gender_acc = test_gender_acc_metric.result()
test_pos_acc = test_pos_acc_metric.result()
test_neg_acc = test_neg_acc_metric.result()


In [None]:
test_age_acc, test_gender_acc, test_pos_acc , test_neg_acc

(<tf.Tensor: shape=(), dtype=float32, numpy=0.77781725>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.83658856>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.6940696>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.5995206>)