In [27]:
import os
import numpy as np
import cv2
from tqdm import tqdm
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt
import itertools
import random
from sklearn.utils import shuffle
from keras import layers
from keras import models
from keras import backend as K

In [None]:
os.chdir('./drive/MyDrive/signature_data_one_shot/')

In [28]:
def get_data(dir):
    orig = []
    forg = []
    for name in tqdm(sorted(os.listdir(dir))):
        original = []
        forged = []
        for image_name in sorted(os.listdir(dir+'/'+name)):
            img = dir+'/'+name+'/'+image_name
            if 'forg' in name.lower():
                forged.append(img)
            else:
                original.append(img)
        
        if 'forg' in name.lower():
            forg.append(forged)
        else:
            orig.append(original)
    
    return orig, forg

In [29]:
train_orig, train_forg = get_data('train_data')
validation_orig, validation_forg = get_data('validation_data')
test_orig, test_forg = get_data('test_data')

In [None]:
print(len(train_orig))
print(len(train_forg))
print(len(validation_orig))
print(len(validation_forg))
print(len(test_orig))
print(len(test_forg))

In [None]:
def euclidean_distance(vects):
    '''Compute Euclidean Distance between two vectors'''
    x, y = vects
    return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))
     

def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

In [None]:
def generate_batch(orig_groups, forg_groups, batch_size = 32):
    '''Function to generate a batch of data with batch_size number of data points
    Half of the data points will be Genuine-Genuine pairs and half will be Genuine-Forged pairs'''
    while True:
        orig_pairs = []
        forg_pairs = []
        gen_gen_labels = []
        gen_for_labels = []
        all_pairs = []
        all_labels = []
        
        for orig, forg in zip(orig_groups, forg_groups):
            orig_pairs.extend(list(itertools.combinations(orig, 2)))
            for i in range(len(orig)):
                forg_pairs.extend(list(itertools.product(orig[i:i+1], forg)))
  
        gen_gen_labels = [1]*len(orig_pairs)
        gen_for_labels = [0]*len(forg_pairs)
        
        all_pairs = orig_pairs + forg_pairs
        all_labels = gen_gen_labels + gen_for_labels
        del orig_pairs, forg_pairs, gen_gen_labels, gen_for_labels
        all_pairs, all_labels = shuffle(all_pairs, all_labels)
            
        k = 0
        pairs=[np.zeros((batch_size, 268,650,1)) for i in range(2)]
        targets=np.zeros((batch_size,))
        for ix, pair in enumerate(all_pairs):
            img1 = cv2.imread(pair[0], cv2.IMREAD_GRAYSCALE)
            img2 = cv2.imread(pair[1], cv2.IMREAD_GRAYSCALE)
            img1 = np.array(img1, dtype = np.float64)
            img2 = np.array(img2, dtype = np.float64)
            img1 /= 255
            img2 /= 255
            img1 = img1[..., np.newaxis]
            img2 = img2[..., np.newaxis]
            pairs[0][k, :, :, :] = img1
            pairs[1][k, :, :, :] = img2
            targets[k] = all_labels[ix]
            k += 1
            if k == batch_size:
                yield pairs, targets
                k = 0
                pairs=[np.zeros((batch_size, 268,650,1)) for i in range(2)]
                targets=np.zeros((batch_size,))

In [None]:
def create_base_network_signal(input_shape):
    '''Base Siamese Network'''
    
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape,padding='same'))
    model.add(layers.MaxPooling2D((2, 2),padding='same'))
    model.add(layers.Conv2D(64, (3, 3), activation='relu',padding='same'))
    model.add(layers.MaxPooling2D((2, 2),padding='same'))
    model.add(layers.Conv2D(64, (3, 3), activation='relu',padding='same'))
    model.add(layers.MaxPooling2D((2, 2),padding='same'))
    model.add(layers.Flatten())
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))

    return model

In [None]:
input_shape=(268,650,1)
base_network = create_base_network_signal(input_shape)

input_a = layers.Input(shape=(input_shape))
input_b = layers.Input(shape=(input_shape))

processed_a = base_network(input_a)
processed_b = base_network(input_b)

distance = layers.Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
prediction = layers.Dense(1,activation='sigmoid')(distance)

model = models.Model(inputs=[input_a, input_b], outputs=prediction)

In [None]:
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
num_train_samples = num_test_samples = 66 * 50 + 96 * 50
num_val_samples = 66 * 7 + 96 * 7
num_train_samples, num_test_samples, num_val_samples

In [None]:
batch_size=64
history = model.fit(
    generate_batch(train_orig, train_forg, batch_size),
                   steps_per_epoch = num_train_samples//batch_size,
                   epochs = 4,
                   validation_data = generate_batch(validation_orig, validation_forg, batch_size),
                   validation_steps = num_val_samples//batch_size)

In [None]:
model.save('siamese.h5')