# signature

In [1]:
import os
import numpy as np
import tensorflow as tf
import itertools
import pandas as pd
from PIL import UnidentifiedImageError
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Lambda, Dropout, BatchNormalization, Activation, GlobalAveragePooling2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

In [2]:
root_dir = './data/genuine_signatures/'
img_size = (128, 128)  # Taille standard pour les images
data = []

# Extensions d'images valides
valid_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')

# Parcours des sous-dossiers (chaque sous-dossier correspond à une personne)
for person_dir in os.listdir(root_dir):
    person_path = os.path.join(root_dir, person_dir)
    
    if os.path.isdir(person_path):
        # Récupérer toutes les signatures dans le sous-dossier avec une extension valide
        signatures = [s for s in os.listdir(person_path) if s.lower().endswith(valid_extensions)]
        signatures_paths = [os.path.join(person_path, s) for s in signatures]
        
        # Créer des paires positives (intra-personne)
        for pair in itertools.combinations(signatures_paths, 2):
            data.append({
                'image_1': pair[0],
                'image_2': pair[1],
                'label': 1  # Similaire (même personne)
            })

all_person_dirs = [os.path.join(root_dir, d) for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]

for person_1, person_2 in itertools.combinations(all_person_dirs, 2):
    signatures_person_1 = [s for s in os.listdir(person_1) if s.lower().endswith(valid_extensions)]
    signatures_person_2 = [s for s in os.listdir(person_2) if s.lower().endswith(valid_extensions)]
    
    for sig1, sig2 in itertools.product(signatures_person_1, signatures_person_2):
        data.append({
            'image_1': os.path.join(person_1, sig1),
            'image_2': os.path.join(person_2, sig2),
            'label': 0  # Différent (personnes différentes)
        })

df = pd.DataFrame(data)
df.to_csv('signature_pairs.csv', sep=";", index=False)

def load_and_preprocess_image(img_path, target_size):
    try:
        img = load_img(img_path, target_size=target_size, color_mode='grayscale')
        img = img_to_array(img) / 255.0  # Normalisation des pixels entre 0 et 1
        return img
    except UnidentifiedImageError:
        print(f"Erreur: Impossible de charger l'image {img_path}.")
        return None

pairs = []
labels = []

for index, row in df.iterrows():
    img1 = load_and_preprocess_image(row['image_1'], img_size)
    img2 = load_and_preprocess_image(row['image_2'], img_size)
    
    if img1 is not None and img2 is not None:
        pairs.append([img1, img2])
        labels.append(row['label'])

pairs = np.array(pairs)
labels = np.array(labels)

pairs_train, pairs_val, labels_train, labels_val = train_test_split(pairs, labels, test_size=0.2, random_state=42)
print(f"Value_counts des classes: {df['label'].value_counts()}")

Value_counts des classes: label
0    32411
1     2834
Name: count, dtype: int64


In [3]:
num_similar = sum(labels == 1)
num_different = sum(labels == 0)
print(f"Nombre de paires similaires (label 1): {num_similar}")
print(f"Nombre de paires non similaires (label 0): {num_different}")

# Séparer les paires et labels en classes 0 et 1
pairs_0 = pairs[labels == 0]
labels_0 = labels[labels == 0]

pairs_1 = pairs[labels == 1]
labels_1 = labels[labels == 1]

# Sous-échantillonnage des paires non similaires (label 0)
pairs_0_downsampled, labels_0_downsampled = resample(pairs_0, labels_0,
                                                     replace=False,    # Échantillonnage sans remplacement
                                                     n_samples=len(labels_1),  # Même nombre que la classe minoritaire
                                                     random_state=42)

# Combinaison des deux classes après équilibrage
pairs_balanced = np.vstack((pairs_0_downsampled, pairs_1))
labels_balanced = np.hstack((labels_0_downsampled, labels_1))

# Re-diviser en ensemble d'entraînement et de validation
pairs_train, pairs_val, labels_train, labels_val = train_test_split(pairs_balanced, labels_balanced, test_size=0.2, random_state=42)

# Vérifier la distribution après équilibrage
print(pd.Series(labels_balanced).value_counts())

print(f"Nombre total de paires après équilibrage: {len(pairs_balanced)}")
print(f"Nombre de paires d'entraînement après équilibrage: {len(pairs_train)}")
print(f"Nombre de paires de validation après équilibrage: {len(pairs_val)}")

Nombre de paires similaires (label 1): 2834
Nombre de paires non similaires (label 0): 32411
0    2834
1    2834
Name: count, dtype: int64
Nombre total de paires après équilibrage: 5668
Nombre de paires d'entraînement après équilibrage: 4534
Nombre de paires de validation après équilibrage: 1134


In [4]:


def contrastive_loss(y_true, y_pred, margin=1.0):
    """
    y_true: labels (0 pour similaire, 1 pour dissemblable)
    y_pred: distances prédites entre les paires d'images
    margin: marge pour les paires dissemblables
    """
    # Conversion de y_true en float32 pour éviter des erreurs de type
    y_true = tf.cast(y_true, tf.float32)
    # Calcul des pertes pour les paires similaires et dissemblables
    loss_similar = (1 - y_true) * 0.5 * K.square(y_pred)
    loss_dissimilar = y_true * 0.5 * K.square(K.maximum(margin - y_pred, 0))
    return K.mean(loss_similar + loss_dissimilar)

def create_base_network(input_shape):
    input = Input(shape=input_shape)
    
    # First Convolutional Block
    x = Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    # Second Convolutional Block
    x = Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    # Third Convolutional Block
    x = Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    # Global Average Pooling
    x = GlobalAveragePooling2D()(x)
    
    # Fully Connected Layers
    x = Dense(512, activation='relu', kernel_regularizer=l2(1e-4))(x)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu', kernel_regularizer=l2(1e-4))(x)
    
    return Model(input, x)

input_shape = (128, 128, 1)
base_network = create_base_network(input_shape)

input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)

processed_a = base_network(input_a)
processed_b = base_network(input_b)

def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

distance = Lambda(euclidean_distance)([processed_a, processed_b])
output = Dense(1, activation='sigmoid')(distance)
model = Model([input_a, input_b], output)

In [5]:
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.0001)
model.compile(loss=contrastive_loss, optimizer=optimizer, metrics=['accuracy'])

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [6]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('models/best_model_V5.keras', save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr= 0.00001, verbose=1)

model.fit(
    datagen.flow([pairs_train[:, 0], pairs_train[:, 1]], labels_train, batch_size=32),
    steps_per_epoch=len(pairs_train) // 32,
    epochs=20,
    validation_data=([pairs_val[:, 0], pairs_val[:, 1]], labels_val),
    callbacks=[early_stopping, model_checkpoint, reduce_lr])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 19: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 20/20


<keras.src.callbacks.History at 0x314a3cbd0>

In [7]:
val_loss, val_accuracy = model.evaluate([pairs_val[:, 0], pairs_val[:, 1]], labels_val)

