In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
import cv2
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay,
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import wandb
from wandb.integration.keras import WandbCallback


In [None]:
# Define callbacks
early_stopping = EarlyStopping(
    monitor="val_loss", patience=5, restore_best_weights=True
)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=3, min_lr=1e-6)


In [None]:
# prompt: code to mount to drive

from google.colab import drive

drive.mount("/content/drive")





In [None]:
size = 128
# Define the Embedding Model (Feature Extractor)
def build_embedding_model(input_shape):
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Conv2D(
                64, (3, 3), activation="relu", input_shape=input_shape
            ),
            tf.keras.layers.MaxPooling2D(2, 2),
            tf.keras.layers.Conv2D(128, (3, 3), activation="relu"),
            tf.keras.layers.MaxPooling2D(2, 2),
            tf.keras.layers.Conv2D(256, (3, 3), activation="relu"),
            tf.keras.layers.MaxPooling2D(4, 4),
            tf.keras.layers.Conv2D(512, (3, 3), activation="relu"),
            tf.keras.layers.MaxPooling2D(4, 4),
            tf.keras.layers.Flatten(),
        ],
        name="embedding_model",
    )
    return model


In [None]:
# Define Inputs for Siamese Network
input_a = tf.keras.layers.Input(shape=(size, size, 1), name="input1")
input_b = tf.keras.layers.Input(shape=(size, size, 1), name="input2")

# Shared Embedding Model
embedding_model = build_embedding_model((size, size, 1))
em_one = embedding_model(input_a)
em_two = embedding_model(input_b)

# Use Absolute Difference Instead of Concatenation
# Corrected the line below to apply the Lambda function correctly


def abs_diff(tensors):
    return tf.abs(tensors[0] - tensors[1])


out = tf.keras.layers.Lambda(
    abs_diff, output_shape=lambda input_shapes: input_shapes[0], name="abs_diff"
)([em_one, em_two])

In [None]:
# Fully Connected Layers for Classification
out = tf.keras.layers.Dense(64, activation="relu")(out)
out = tf.keras.layers.Dense(1, activation="sigmoid", name="Output")(out)
# Create and Compile Model
model = tf.keras.models.Model([input_a, input_b], out)
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)


In [6]:
# Data Loading and Preprocessing
data_dir = "\signatures"
genuine_dir = os.path.join(data_dir, "full_org")
forgery_dir = os.path.join(data_dir, "full_forg")


In [None]:
def load_signature_images(genuine_path, forgery_path, target_size=(128, 128)):
    def load_images(path):
        images = []
        for image_file in os.listdir(path):
            img = cv2.imread(os.path.join(path, image_file), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                images.append(cv2.resize(img, target_size))
        return np.array(images)

    return load_images(genuine_path), load_images(forgery_path)


\


In [None]:
genuine_images, forgery_images = load_signature_images(genuine_dir, forgery_dir)
genuine_images, forgery_images = (
    genuine_images / 255.0,
    forgery_images / 255.0,
)  # Normalize


In [None]:
# Create Pairs for Siamese Training
def create_pairs(genuine, forged):
    pairs, labels = [], []
    for i in range(min(len(genuine), len(forged))):
        pairs.append([genuine[i], genuine[(i + 1) % len(genuine)]])  # Genuine pair
        labels.append(1)
        pairs.append([genuine[i], forged[i]])  # Forged pair
        labels.append(0)
    return np.array(pairs), np.array(labels)


pairs, labels = create_pairs(genuine_images, forgery_images)
X_train, X_test, y_train, y_test = train_test_split(
    pairs, labels, test_size=0.2, random_state=42
)


In [None]:
# Train the Model - No wandb callbacks at all
history = model.fit(
    [X_train[:, 0], X_train[:, 1]],
    y_train,
    validation_data=([X_test[:, 0], X_test[:, 1]], y_test),
    epochs=20,
    batch_size=32,
    callbacks=[
        EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
        ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=3, min_lr=0.0001),
    ],
)
# Evaluate Model
loss, accuracy = model.evaluate([X_test[:, 0], X_test[:, 1]], y_test, verbose=0)
predictions = model.predict([X_test[:, 0], X_test[:, 1]])
predicted_labels = (predictions > 0.5).astype(int)


In [None]:
# Print Results
print(f"Test Accuracy: {accuracy:.4f}")
print(classification_report(y_test, predicted_labels))
# Create the directory if it doesn't exist
model_save_dir = "/content/drive/MyDrive/signature_verification_model(3)"
os.makedirs(model_save_dir, exist_ok=True)

# Save the model to Google Drive
model_save_path = os.path.join(model_save_dir, "siamese_signature_model.keras")
model.save(model_save_path)
print(f"Model saved to: {model_save_path}")
wandb.init(project="signature-verification")
# Log results to wandb AFTER training is complete
# This avoids any issues with wandb trying to monitor the model during training
wandb.config.update(
    {
        "model_type": "siamese_network",
        "image_size": 128,
        "batch_size": 32,
        "learning_rate": 0.01,
        "epochs": 20,
    }
)

# Log test metrics
wandb.log(
    {
        "Test Accuracy": accuracy,
        "Test Loss": loss,
    }
)
# Log training history
for epoch in range(len(history.history["loss"])):
    wandb.log(
        {
            "Epoch": epoch,
            "Training Loss": history.history["loss"][epoch],
            "Validation Loss": history.history["val_loss"][epoch],
            "Training Accuracy": history.history["accuracy"][epoch],
            "Validation Accuracy": history.history["val_accuracy"][epoch],
        }
    )
# Log the Model to wandb as an Artifact
artifact = wandb.Artifact("siamese_signature_model", type="model")
artifact.add_file(model_path)
wandb.log_artifact(artifact)
# Save the embedding model separately for later feature extraction
embedding_model_path = "signature_embedding_model.keras"
embedding_model.save(embedding_model_path)
embed_artifact = wandb.Artifact("signature_embedding_model", type="model")
embed_artifact.add_file(embedding_model_path)
wandb.log_artifact(embed_artifact)


In [None]:
# Create a visualization of sample pairs and predictions
def visualize_pairs(X_pairs, y_true, y_pred, num_pairs=5):
    fig, axes = plt.subplots(num_pairs, 3, figsize=(12, 3 * num_pairs))

    # Get some random samples
    indices = np.random.choice(len(y_true), num_pairs, replace=False)

    for i, idx in enumerate(indices):
        # Get the pair of images
        img1, img2 = X_pairs[idx, 0], X_pairs[idx, 1]
        true_label = "Genuine" if y_true[idx] == 1 else "Forgery"
        pred_label = "Genuine" if y_pred[idx] == 1 else "Forgery"
        match = "✓" if y_true[idx] == y_pred[idx] else "✗"

        # Plot the images
        axes[i, 0].imshow(img1, cmap="gray")
        axes[i, 0].set_title("Reference Signature")
        axes[i, 0].axis("off")

        axes[i, 1].imshow(img2, cmap="gray")
        axes[i, 1].set_title("Test Signature")
        axes[i, 1].axis("off")

        # Add a text annotation for the prediction
        axes[i, 2].text(
            0.5,
            0.5,
            f"True: {true_label}\nPred: {pred_label}\n{match}",
            horizontalalignment="center",
            verticalalignment="center",
            fontsize=12,
            color="black" if y_true[idx] == y_pred[idx] else "red",
        )
        axes[i, 2].axis("off")

    plt.tight_layout()
    return fig


# Create and log the visualization
vis_fig = visualize_pairs(X_test, y_test, predicted_labels)
wandb.log({"Sample Predictions": wandb.Image(vis_fig)})
plt.close(vis_fig)


# Function to create a signature verification tool
def create_signature_verifier(embedding_model, threshold=0.5):
    """
    Creates a function that can verify if two signatures match

    Args:
        embedding_model: The trained embedding model
        threshold: Similarity threshold (higher = more strict)

    Returns:
        A function that takes two signature images and returns True if they match
    """

    def verify_signature(reference_sig, test_sig):
        # Preprocess images
        ref = cv2.resize(reference_sig, (size, size)) / 255.0
        test = cv2.resize(test_sig, (size, size)) / 255.0

        # Reshape for model input
        ref = ref.reshape(1, size, size, 1)
        test = test.reshape(1, size, size, 1)

        # Get embeddings
        ref_embedding = embedding_model.predict(ref)
        test_embedding = embedding_model.predict(test)
        # Calculate similarity (can use cosine similarity or Euclidean distance)
        similarity = 1 - np.sum(np.abs(ref_embedding - test_embedding)) / np.sum(
            np.abs(ref_embedding) + np.abs(test_embedding)
        )

        return similarity > threshold, similarity

    return verify_signature


# Log completion and finish wandb run
wandb.log({"status": "completed"})
wandb.finish()

print("Training and evaluation completed successfully!")
