In [None]:
import numpy as np
import random
import os
import cv2
import itertools
import matplotlib.pyplot as plt

In [None]:
width = 200
height = 150
num_sig = 55

In [None]:
def make_pairs(genuine, forged):

    #create (genuine, genuine) pairs
    gen_gen_pairs = []
    for t in itertools.combinations(genuine, 2):
        gen_gen_pairs.append([t[0], t[1]])

    #create (genuine, forged) pairs
    gen_forg_pairs_temp = []
    for t in itertools.product(genuine, forged):
        gen_forg_pairs_temp.append([t[0], t[1]])

    gen_gen_pairs = np.array(gen_gen_pairs)
    gen_forg_pairs_temp = np.array(gen_forg_pairs_temp)

    # pick the same number of (genuine, forged) pairs as the (genuine, genuine)
    # ones in order to balance the data
    gen_forg_indices = random.sample(range(1, gen_forg_pairs_temp.shape[0]), gen_gen_pairs.shape[0])

    gen_forg_pairs = []
    for ind in gen_forg_indices:
        gen_forg_pairs.append(gen_forg_pairs_temp[ind])

    gen_forg_pairs = np.array(gen_forg_pairs)

    return gen_gen_pairs, gen_forg_pairs


def prepare_data(genuine, forged, X, y):

    gen_gen_pairs, gen_forg_pairs = make_pairs(genuine, forged)
    all_pairs = np.empty((552, 2, width, height))
    all_pairs = np.concatenate((gen_gen_pairs, gen_forg_pairs), axis = 0)
    print(f"All pairs shape: {all_pairs.shape}")
    labels = np.concatenate((np.ones(gen_gen_pairs.shape[0]), np.zeros(gen_forg_pairs.shape[0])), axis = 0)
    print(f"Labels shape: {labels.shape}")
    
    combined = list(zip(all_pairs, labels))
    random.shuffle(combined)
    shuffled_pairs, shuffled_labels = zip(*combined)
    
    X[:] = shuffled_pairs
    y[:] = shuffled_labels

In [None]:
parent_path = "/kaggle/input/cedar-signatures/signatures/"

In [None]:
X = np.empty((num_sig * 2 * 276, 2, height, width))
y = np.empty((num_sig * 2 * 276))

m = 0
for ind in range(1, num_sig+1):

    print(f"Iteration number {m+1}")
    genuine = np.empty((24, height, width))
    forged = np.empty((24, height, width))

    path = parent_path + "signatures_" + str(ind)

    i = 0
    j = 0
    for image in os.listdir(path):
        image_path = os.path.join(path, image)
        if os.path.isfile(image_path):
          img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
          img = 255 - img
          img = cv2.resize(img, (width, height))
          if(image.split("_")[0] == "original"):
            genuine[i] = img
            i += 1
          elif(image.split("_")[0] == "forgeries"):
            forged[j] = img
            j += 1

    #prepare_data(genuine, forged, X_train, y_train, m)
    prepare_data(genuine, forged, X[m*552:(m+1)*552], y[m*552:(m+1)*552])
    m += 1


print(f"X shape : {X.shape}")
print(f"y shape : {y.shape}")

In [None]:
def visualize(pairs, labels, to_show=6, num_col=3, predictions=None, test=False):

    num_row = to_show // num_col if to_show // num_col != 0 else 1

    to_show = num_row * num_col

    # Plot the images
    fig, axes = plt.subplots(num_row, num_col, figsize=(5, 5))
    
    for i in range(to_show):
        # If the number of rows is 1, the axes array is one-dimensional
        if num_row == 1:
            ax = axes[i % num_col]
        else:
            ax = axes[i // num_col, i % num_col]
        
        n = random.randint(0, pairs.shape[0])
        print(f"Index to show: {n}")
        ax.imshow(np.concatenate([pairs[n][0], pairs[n][1]], axis=1), cmap="gray")
        ax.set_axis_off()
        if test:
            ax.set_title("True: {} | Pred: {:.5f}".format(labels[n], predictions[n][0]))
        else:
            ax.set_title("Label: {}".format(labels[n]))
    if test:
        plt.tight_layout(rect=(0, 0, 1.9, 1.9), w_pad=0.0)
    else:
        plt.tight_layout(rect=(0, 0, 1.5, 1.5))
    plt.show()

In [None]:
visualize(X[:-1], y[:-1], to_show = 4, num_col = 4)

In [None]:
np.save("/kaggle/working/X.npy", X)

In [None]:
np.save("/kaggle/working/y.npy", y)