<a href="https://colab.research.google.com/github/sartabaz/biometric-fusion/blob/main/Fusion_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Load Libraries

This section imports all necessary libraries for evaluating the EfficientNETV2S deep learning models on fused palmprint and fingerprint. The imports are grouped logically for better understanding.

In [None]:
# Core deep learning frameworks
import tensorflow as tf
from tensorflow.keras import layers, models, applications  # Keras API for model building

# Data manipulation and analysis
import pandas as pd  # Dataframes and CSV handling
import numpy as np   # Numerical operations

# Data visualization
import matplotlib.pyplot as plt  # Basic plotting
import seaborn as sns  # Enhanced visualizations

# Model evaluation metrics
from sklearn.metrics import roc_curve, auc, classification_report  # Classification metrics
from sklearn.preprocessing import LabelEncoder  # For label preprocessing

# Similarity metrics
from scipy.spatial.distance import cosine  # Cosine similarity/distance calculations

1. **TensorFlow/Keras**:
   - Core framework for building neural networks
   - `applications` provides pre-trained models (ResNet, VGG, etc.)

2. **Data Handling**:
   - Pandas for structured data operations
   - NumPy for numerical computations and array operations

3. **Visualization**:
   - Matplotlib for basic plots (accuracy/loss curves)
   - Seaborn for more sophisticated statistical visualizations

4. **Evaluation Metrics**:
   - ROC/AUC for binary classification performance
   - Classification report for precision/recall metrics
   - LabelEncoder for preparing categorical targets

5. **Similarity Metrics**:
   - Cosine distance for comparing feature vectors (you can use other distances)

### Best Practices:
- Keep imports organized by functionality
- Only import what you need to maintain clean namespace
- For Colab, you may need to `!pip install` certain packages first

#2. Experiment Configuration

This section defines the needed variables for processing features fusion.

In [None]:
# Core parameters

# Data Configuration
NUM_CLASSES = 140
SAMPLES_PER_CLASS = 12

# Test configuration
NUM_PAIRS=30000

# Path Configuration
MODEL_SAVE_PATH = '/models/checkpoint.keras'  # For ModelCheckpoint
PALM_FEATURES_PATH = 'Palm_features.csv'  # Update this path
FINGER_FEATURES_PATH = 'finger_features.csv'  # Update this path

#3. Define functions

A concise summary of all the functions, organized by purpose and key details:

In [None]:
# Load and preprocess data from CSV

def load_features(csv_path, num_classes, num_samples):
    # Load CSV data
    df = pd.read_csv(csv_path, nrows=num_classes * num_samples)

    # Extract labels
    y = df.iloc[:,-1].values

    # Extract and preprocess images
    X = df.iloc[:, :-1].values.astype('float32')

    # Convert to TensorFlow tensors
    X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
    y_tensor = tf.convert_to_tensor(y, dtype=tf.int32)

    # Encode labels
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_categorical = tf.keras.utils.to_categorical(y_encoded, num_classes=num_classes)

    return X, y_categorical, y_encoded, le.classes_

# Compute verification metrics
def compute_verification_metrics(features, labels, num_pairs=1000):
    # Generate genuine and impostor pairs
    genuine_pairs = []
    impostor_pairs = []

    # Group indices by class
    class_indices = {}
    for i, label in enumerate(labels):
        label_int = label.numpy()
        if label_int not in class_indices:
            class_indices[label_int] = []
        class_indices[label_int].append(i)

    # Create genuine pairs (same class)
    for label, indices in class_indices.items():
        if len(indices) < 2:
            continue
        np.random.shuffle(indices)
        for i in range(0, len(indices) - 1, 2):
            if len(genuine_pairs) < num_pairs // 2:
                genuine_pairs.append((indices[i], indices[i+1]))

    # Create impostor pairs (different classes)
    class_list = list(class_indices.keys())
    while len(impostor_pairs) < num_pairs // 2:
        class1, class2 = np.random.choice(class_list, 2, replace=False)
        if class1 == class2 or not class_indices[class1] or not class_indices[class2]:
            continue
        idx1 = np.random.choice(class_indices[class1])
        idx2 = np.random.choice(class_indices[class2])
        impostor_pairs.append((idx1, idx2))

    # Compute similarities
    genuine_scores = []
    for i, j in genuine_pairs:
        feat1 = features[i]
        feat2 = features[j]
        similarity = 1 - cosine(feat1, feat2)
        genuine_scores.append(similarity)

    impostor_scores = []
    for i, j in impostor_pairs:
        feat1 = features[i]
        feat2 = features[j]
        similarity = 1 - cosine(feat1, feat2)
        impostor_scores.append(similarity)

    return np.array(genuine_scores), np.array(impostor_scores)

# Compute FAR and FRR
def compute_far_frr(genuine_scores, impostor_scores):
    thresholds = np.linspace(0, 1, 100)
    far = np.zeros_like(thresholds)
    frr = np.zeros_like(thresholds)

    for i, thresh in enumerate(thresholds):
        # False Acceptance Rate
        far[i] = np.sum(impostor_scores >= thresh) / len(impostor_scores)

        # False Rejection Rate
        frr[i] = np.sum(genuine_scores < thresh) / len(genuine_scores)

    return far, frr, thresholds

# Plot ROC and FAR/FRR curves
def plot_verification_metrics(genuine_scores, impostor_scores,data='NIR'):
    far, frr, thresholds = compute_far_frr(genuine_scores, impostor_scores)

    # Compute ROC curve
    y_true = np.concatenate([np.ones_like(genuine_scores), np.zeros_like(impostor_scores)])
    y_score = np.concatenate([genuine_scores, impostor_scores])
    fpr, tpr, roc_thresholds = roc_curve(y_true, y_score)
    roc_auc = auc(fpr, tpr)

    # Find EER (Equal Error Rate)
    eer_idx = np.argmin(np.abs(far - frr))
    eer = (far[eer_idx] + frr[eer_idx]) / 2
    eer_thresh = thresholds[eer_idx]

    # Create plots
    plt.figure(figsize=(15, 10))

    # ROC curve
    plt.subplot(2, 2, 1)
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate (FAR)')
    plt.ylabel('True Positive Rate (GAR)')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")

    # FAR/FRR curve
    plt.subplot(2, 2, 2)
    plt.plot(thresholds, far, 'b-', label='FAR')
    plt.plot(thresholds, frr, 'r-', label='FRR')
    plt.axvline(x=eer_thresh, color='g', linestyle='--', label=f'EER Threshold ({eer_thresh:.2f})')
    plt.xlabel('Similarity Threshold')
    plt.ylabel('Error Rate')
    plt.title(f'FAR/FRR Curve (EER = {eer:.4f})')
    plt.legend()

    # Score distributions
    plt.subplot(2, 2, 3)
    sns.kdeplot(genuine_scores, label='Genuine Scores', fill=True)
    sns.kdeplot(impostor_scores, label='Impostor Scores', fill=True)
    plt.axvline(x=eer_thresh, color='g', linestyle='--', label=f'EER Threshold')
    plt.xlabel('Similarity Score')
    plt.ylabel('Density')
    plt.title('Score Distributions')
    plt.legend()

    # Detection Error Tradeoff (DET)
    plt.subplot(2, 2, 4)
    plt.plot(far, frr)
    plt.scatter(far[eer_idx], frr[eer_idx], color='red', zorder=10,
                label=f'EER ({eer:.4f})')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('False Acceptance Rate (FAR)')
    plt.ylabel('False Rejection Rate (FRR)')
    plt.title('Detection Error Tradeoff (DET) Curve')
    plt.legend()

    plt.tight_layout()
    plt.savefig('verification_metrics_'+data+'.png')
    plt.show()

    return eer, eer_thresh

In [None]:
def split_data_by_indices(X, y_categorical, y_encoded, total, n_train):
    """
    Splits data tensors into training, validation, and test sets based on pre-defined index logic.

    Args:
        X: TensorFlow tensor containing image data.
        y_categorical: TensorFlow tensor containing one-hot encoded labels.
        y_encoded: TensorFlow tensor containing integer encoded labels.

    Returns:
        A tuple containing: (X_train, y_train, X_val, y_val, X_test, y_test, y_test_encoded)
    """
    train_indices = []
    test_indices = []

    # The total number of samples is derived from the shape of the input tensors
    total_samples = tf.shape(X)[0].numpy() # Convert tensor shape to numpy int

    i = 0
    j = 0
    while (i < total_samples):
        # Training indices
        while (i < j + n_train):
            if i < total_samples: # Ensure we don't go out of bounds
                train_indices.append(i)
                i += 1
            else:
                break # Exit if we've used all samples
        j = i + total - n_train # Advance j for the next class
        # Test indices
        while (i < j):
            if i < total_samples:
                test_indices.append(i)
                i += 1
            else:
                break
    # Convert indices lists to TensorFlow tensors
    train_indices_tensor = tf.convert_to_tensor(train_indices, dtype=tf.int32)
    test_indices_tensor = tf.convert_to_tensor(test_indices, dtype=tf.int32)

    # Use tf.gather to split the data tensors
    X_train = tf.gather(X, train_indices_tensor)
    y_train = tf.gather(y_categorical, train_indices_tensor)

    X_test = tf.gather(X, test_indices_tensor)
    y_test = tf.gather(y_categorical, test_indices_tensor)
    y_test_encoded = tf.gather(y_encoded, test_indices_tensor)

    return X_train, y_train, X_test, y_test, y_test_encoded

# 4. Apply fusion and test

### **Key Workflow Summary**
1. **Data Prep**:  
   `load_features()` → concatenate  
2. **Model Training**:  
   Train classifiers → print results
3. **Evaluation**:  
   Extract features → `compute_verification_metrics()` → `plot_verification_metrics()`


In [None]:
# Load features
X_palm, y_categorical, y_encoded, class_names = load_features(PALM_FEATURES_PATH,NUM_CLASSES,SAMPLES_PER_CLASS)
X_finger, y_categorical, y_encoded, class_names = load_features(FINGER_FEATURES_PATH,NUM_CLASSES,SAMPLES_PER_CLASS)
X = np.concatenate([X_palm, X_finger], axis=-1)

NUM_CLASSES = len(class_names)

In [None]:
from collections import Counter
# Convert the integer encoded labels to a list or numpy array
y_encoded_list = y_encoded.numpy()
# Use collections.Counter to count samples per class
class_counts = Counter(y_encoded_list)

In [None]:
X_train, y_train, X_test, y_test, y_test_encoded = split_data_by_indices(X, y_categorical, y_encoded, class_counts, 10)


In [None]:
y_encoded

## **Use for test on the whole dataset**

In [None]:
X_test = tf.concat([X_train, X_test], axis=0)
y_test = tf.concat([y_train, y_test], axis=0)
y_test_encoded = tf.convert_to_tensor(y_encoded, dtype=tf.int32)

## a. Train classifiers on features

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
import matplotlib.pyplot as plt

# Convert TensorFlow tensors to NumPy arrays for scikit-learn
X_train_np = X_train.numpy()
y_train_encoded_np = np.argmax(y_train.numpy(), axis=1) # Get the original class labels
X_test_np = X_test.numpy()
y_test_encoded_np = np.argmax(y_test.numpy(), axis=1) # Get the original class labels

# Define classifiers
classifiers = {
    "SVM": SVC(probability=True), # probability=True for potential future use with ROC curves
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5)
}

results = {}

# Train and evaluate each classifier
for name, clf in classifiers.items():
    print(f"Training {name}...")
    clf.fit(X_train_np, y_train_encoded_np)
    y_pred = clf.predict(X_test_np)
    accuracy = accuracy_score(y_test_encoded_np, y_pred)
    report = classification_report(y_test_encoded_np, y_pred)
    cm = confusion_matrix(y_test_encoded_np, y_pred)

    results[name] = {
        "accuracy": accuracy,
        "report": report,
        "confusion_matrix": cm,
        "predictions": y_pred
    }
    print(f"{name} Accuracy: {accuracy:.4f}")
    print("-" * 30)

## b. Print results

In [None]:
# Print detailed results and plot confusion matrices
for name, res in results.items():
    print(f"--- Results for {name} ---")
    print(f"Accuracy: {res['accuracy']:.4f}")
    print("\nClassification Report:")
    print(res['report'])

    print("\n" + "=" * 40 + "\n")

## c. Compute metrics and plot

In [None]:
# Compute verification metrics
genuine_scores, impostor_scores = compute_verification_metrics(
    X_test,
    y_test_encoded,
    num_pairs=NUM_PAIRS
)

# Plot verification metrics
eer, eer_threshold = plot_verification_metrics(genuine_scores, impostor_scores)
print(f"Equal Error Rate (EER): {eer:.4f}")
print(f"Optimal Threshold: {eer_threshold:.4f}")