In [3]:
import os
import glob
import numpy as np
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay
)
from sklearn.calibration import calibration_curve

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import matplotlib.pyplot as plt
import pickle

In [4]:
# =========================================================
# 1. LOAD DATASET
# =========================================================
def load_palm_vein_dataset(
    data_root="Multispectral Palmprint Database/NIR",
    img_size=(256, 256),
    num_persons=20
):
    """
    Loads grayscale palm images (normalized to [0,1]) and labels per person.
    Returns:
        X : (num_samples, H*W) float32
        y : (num_samples,) int64
        label_names : list of folder names (e.g. "0001", "0002", ...)
    """
    X = []
    y = []
    label_names = []

    # get sorted subfolders, pick first num_persons
    all_dirs = sorted(
        d for d in os.listdir(data_root)
        if os.path.isdir(os.path.join(data_root, d))
    )
    person_dirs = all_dirs[:num_persons]
    print(f"Using persons: {person_dirs}")

    for label_idx, person in enumerate(person_dirs):
        label_names.append(person)
        folder = os.path.join(data_root, person)

        img_paths = []
        for ext in ("*.bmp", "*.png", "*.jpg", "*.jpeg", "*.tif", "*.tiff"):
            img_paths.extend(glob.glob(os.path.join(folder, ext)))

        if not img_paths:
            print(f"Warning: no images found in {folder}")
            continue

        for path in img_paths:
            img = Image.open(path).convert("L")
            img = img.resize(img_size)
            arr = np.array(img, dtype=np.float32)
            arr /= 255.0  # normalization [0,1]
            X.append(arr.flatten())
            y.append(label_idx)

    X = np.stack(X).astype(np.float32)
    y = np.array(y, dtype=np.int64)
    return X, y, label_names

In [5]:
# =========================================================
# 2. MAIN PIPELINE
# =========================================================
# 2.1 Load data
X, y, label_names = load_palm_vein_dataset(
    data_root="Multispectral Palmprint Database/NIR",
    img_size=(256, 256),
    num_persons=30
)
print("X shape:", X.shape)
print("y shape:", y.shape)
print("#classes:", len(label_names))

# 2.2 Train/test split
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)
print("Train size:", X_train_raw.shape[0], "Test size:", X_test_raw.shape[0])

# 2.3 Standardization (zero-mean, unit-variance per feature)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_raw)
X_test_scaled = scaler.transform(X_test_raw)

# 2.4 PCA
n_components = 150
pca = PCA(
    n_components=n_components,
    whiten=True,
    random_state=42
)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

print("X_train_pca shape:", X_train_pca.shape)
print("X_test_pca shape:", X_test_pca.shape)

num_classes = len(label_names)


Using persons: ['0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008', '0009', '0010', '0011', '0012', '0013', '0014', '0015', '0016', '0017', '0018', '0019', '0020', '0021', '0022', '0023', '0024', '0025', '0026', '0027', '0028', '0029', '0030']
X shape: (360, 65536)
y shape: (360,)
#classes: 30
Train size: 288 Test size: 72
X_train_pca shape: (288, 150)
X_test_pca shape: (72, 150)


In [6]:
# =========================================================
# 3. BUILD MLP MODEL (TensorFlow/Keras)
# =========================================================
def build_mlp_model(input_dim, num_classes):
    model = keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation="relu"),
        layers.Dense(64, activation="relu"),
        layers.Dense(num_classes, activation="softmax")
    ])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model


model = build_mlp_model(input_dim=n_components, num_classes=num_classes)
model.summary()


In [None]:
# =========================================================
# 4. TRAIN MODEL
# =========================================================
batch_size = 32
epochs = 50

early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss",      # you could also use "val_accuracy"
    patience=3,              # epochs with no improvement before stopping
    restore_best_weights=True
)

history = model.fit(
    X_train_pca, y_train,
    validation_data=(X_test_pca, y_test),
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[early_stop],
    verbose=2
)

# =========================================================
# 5. SAVE MODEL + PCA + SCALER
# =========================================================
# save keras model
model_save_path = "palm_mlp_pca_tf.h5"
model.save(model_save_path)
print(f"TensorFlow model saved to: {model_save_path}")

# save scaler, pca, label names (for later inference)
meta_save_path = "palm_pca_scaler_labelmeta.pkl"
with open(meta_save_path, "wb") as f:
    pickle.dump(
        {
            "scaler": scaler,
            "pca": pca,
            "label_names": label_names
        },
        f
    )
print(f"Scaler/PCA/label_names saved to: {meta_save_path}")

Epoch 1/50
9/9 - 0s - 20ms/step - accuracy: 1.0000 - loss: 0.0055 - val_accuracy: 0.9722 - val_loss: 0.1338
Epoch 2/50
9/9 - 0s - 14ms/step - accuracy: 1.0000 - loss: 0.0053 - val_accuracy: 0.9722 - val_loss: 0.1319
Epoch 3/50
9/9 - 0s - 15ms/step - accuracy: 1.0000 - loss: 0.0051 - val_accuracy: 0.9722 - val_loss: 0.1302
Epoch 4/50
9/9 - 0s - 23ms/step - accuracy: 1.0000 - loss: 0.0049 - val_accuracy: 0.9722 - val_loss: 0.1286
Epoch 5/50
9/9 - 0s - 13ms/step - accuracy: 1.0000 - loss: 0.0048 - val_accuracy: 0.9722 - val_loss: 0.1270
Epoch 6/50
9/9 - 0s - 15ms/step - accuracy: 1.0000 - loss: 0.0046 - val_accuracy: 0.9722 - val_loss: 0.1253
Epoch 7/50
9/9 - 0s - 17ms/step - accuracy: 1.0000 - loss: 0.0044 - val_accuracy: 0.9722 - val_loss: 0.1236
Epoch 8/50
9/9 - 0s - 14ms/step - accuracy: 1.0000 - loss: 0.0043 - val_accuracy: 0.9722 - val_loss: 0.1223
Epoch 9/50
9/9 - 0s - 14ms/step - accuracy: 1.0000 - loss: 0.0041 - val_accuracy: 0.9722 - val_loss: 0.1209
Epoch 10/50
9/9 - 0s - 14ms/



TensorFlow model saved to: palm_mlp_pca_tf.h5
Scaler/PCA/label_names saved to: palm_pca_scaler_labelmeta.pkl


In [4]:
# =========================================================
# 6. EVALUATE
# =========================================================
# Final train loss & accuracy from history
final_train_loss = history.history["loss"][-1]
final_train_acc = history.history["accuracy"][-1]

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test_pca, y_test, verbose=0)

print("\n================ FINAL METRICS ================")
print(f"Train Loss:     {final_train_loss:.4f}")
print(f"Train Accuracy: {final_train_acc:.4f}")
print(f"Test  Loss:     {test_loss:.4f}")
print(f"Test  Accuracy: {test_acc:.4f}")
print("================================================\n")

# Predictions for detailed metrics
y_test_proba = model.predict(X_test_pca)
y_test_pred = np.argmax(y_test_proba, axis=1)

print("Classification report (test set):")
print(classification_report(y_test, y_test_pred, target_names=label_names))

cm = confusion_matrix(y_test, y_test_pred)
print("Confusion matrix:")
print(cm)

NameError: name 'history' is not defined

In [2]:
# =========================================================
# 7. PLOTS
# =========================================================

# 7.1 Train vs Test loss curve
plt.figure(figsize=(6, 4))
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Train vs Test Loss")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# 7.1b (optional) Train vs Test accuracy curve
plt.figure(figsize=(6, 4))
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Test Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Train vs Test Accuracy")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# 7.2 Confusion matrix plot
plt.figure(figsize=(6, 6))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_names)
disp.plot(cmap="Blues", xticks_rotation=45, values_format="d")
plt.title("Confusion Matrix - Test Set")
plt.tight_layout()
plt.show()

# 7.3 Calibration curve for one class
# Explanation:
# For class k, we compare predicted prob p_k with the true fraction of samples
# that really belong to class k, across probability bins.
class_id = 0  # e.g., first person in label_names
y_true_binary = (y_test == class_id).astype(int)
y_prob_class = y_test_proba[:, class_id]

prob_true, prob_pred = calibration_curve(
    y_true_binary, y_prob_class, n_bins=10, strategy="uniform"
)

plt.figure(figsize=(6, 6))
plt.plot(prob_pred, prob_true, "s-", label=f"Class {label_names[class_id]}")
plt.plot([0, 1], [0, 1], "k--", label="Perfectly calibrated")
plt.xlabel("Mean predicted probability")
plt.ylabel("Fraction of positives")
plt.title("Calibration Curve (Reliability Diagram)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# 7.4 PCA cumulative explained variance plot
explained_var_ratio = pca.explained_variance_ratio_
cum_explained = np.cumsum(explained_var_ratio) * 100.0  # %

plt.figure(figsize=(6, 4))
plt.plot(range(1, len(cum_explained) + 1), cum_explained, marker="o")
plt.axhline(95, color="r", linestyle="--", label="95% variance")
plt.xlabel("Number of Principal Components")
plt.ylabel("Cumulative Explained Variance (%)")
plt.title("PCA Cumulative Explained Variance")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

NameError: name 'plt' is not defined

In [None]:
# =========================================================
# 8. VERIFICATION (single image)
# =========================================================
def verify_image_tf(
    model,
    scaler,
    pca,
    img_path,
    claimed_person_name,
    label_names,
    img_size=(256, 256),
    threshold=None
):
    """
    Verify if the image at img_path belongs to claimed_person_name.
    Uses the same preprocessing (normalize -> scaler -> PCA) as training.

    Returns:
        is_match (bool),
        predicted_name (str),
        proba (float)   # predicted probability of the predicted class
    """
    # 1) Load + preprocess image
    img = Image.open(img_path).convert("L").resize(img_size)
    arr = np.array(img, dtype=np.float32) / 255.0        # [H, W] in [0,1]
    x_raw = arr.flatten().reshape(1, -1)                 # (1, 65536)

    # 2) Apply same scaler + PCA as training
    x_scaled = scaler.transform(x_raw)                   # (1, 65536)
    x_pca = pca.transform(x_scaled)                      # (1, n_components)

    # 3) Predict with Keras model (outputs softmax probabilities)
    y_proba = model.predict(x_pca, verbose=0)            # (1, num_classes)
    proba_vec = y_proba[0]
    pred_class = int(np.argmax(proba_vec))
    pred_name = label_names[pred_class]
    proba = float(proba_vec[pred_class])

    # 4) Verification rule
    if threshold is not None:
        is_match = (pred_name == claimed_person_name) and (proba >= threshold)
    else:
        is_match = (pred_name == claimed_person_name)

    return is_match, pred_name, proba


# ---- TEST VERIFICATION ----
test_img_path = "Multispectral Palmprint Database/NIR/0003/2_05.jpg"

is_match, pred_name, prob = verify_image_tf(
    model=model,
    scaler=scaler,
    pca=pca,
    img_path=test_img_path,
    claimed_person_name="0003",   # claimed identity
    label_names=label_names,
    threshold=0.7                 # adjust threshold if needed
)

print("Match:", is_match)
print("Predicted class:", pred_name)
print("Probability:", prob)
