In [2]:
import random
import numpy as np
import keras
import wandb
from wandb.keras import WandbCallback
from keras.models import Sequential, Model
from keras.layers import Flatten, Dense, Concatenate, Dot, Lambda, Input
from keras.datasets import mnist
from keras.optimizers import Adam
import matplotlib.pyplot as plt

In [3]:
#import mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

In [4]:
#create array with pair of mnist data point. Also has a label -> 1 means same number and 0 means different
def make_pairs(x, y):
    num_classes = max(y) + 1
    
    #create array for each number 0 to 9 and importing the corresponding labels in it
    digit_indices = [np.where(y == i)[0] for i in range(num_classes)]
    
    pairs = []
    labels = []
    
    
    for idx1 in range(len(x)):
        # add a matching example
        x1 = x[idx1]
        label1 = y[idx1]
        
        #for a given label, find another image with same label and create a pair
        idx2 = random.choice(digit_indices[label1])
        x2 = x[idx2]
        
        pairs += [[x1, x2]]
        labels += [1]#1 signals same lables
    
        # add a not matching example
        label2 = random.randint(0, num_classes-1)
        
        #since we are making pair of image that are not matching, generate random integer values from 0 to 9 until the condition is met!
        while label2 == label1:
            label2 = random.randint(0, num_classes-1)

        idx2 = random.choice(digit_indices[label2])
        x2 = x[idx2]
        
        pairs += [[x1, x2]]
        labels += [0]#0 signals different lables

    return np.array(pairs), np.array(labels)

pairs_train, labels_train = make_pairs(x_train, y_train)
pairs_test, labels_test = make_pairs(x_test, y_test)

In [5]:
#plt.imshow(x_train[21])
print(labels_train[21])

0


In [6]:
from keras import backend as K

#formula to calculate euclidean distance
def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

#create a model with just flatten and dense layer with relu activation function
input = Input((28,28))
x = Flatten()(input)
x = Dense(128, activation='relu')(x)
dense = Model(input, x)

# two input images
input1 = Input((28,28))
input2 = Input((28,28))

dense1 = dense(input1)
dense2 = dense(input2)

merge_layer = Lambda(euclidean_distance)([dense1,dense2])
dense_layer = Dense(1, activation="sigmoid")(merge_layer)
model = Model(inputs=[input1, input2], outputs=dense_layer)
model.compile(loss = "binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [7]:
wandb.init(project="siamese")

#fit the model with the pair of images creates in an array above!
model.fit([pairs_train[:,0], pairs_train[:,1]], labels_train[:], batch_size=16, epochs=10, callbacks=[WandbCallback()])

wandb: Currently logged in as: alicesharma201 (use `wandb login --relogin` to force relogin)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x20e09732f70>

In [None]:
#picking two samples at random and checking their similarity with the help of the trained model!
new_x = np.reshape(x_test[3],(1,28,28))
new_y = np.reshape(x_test[27],(1,28,28))
print(y_test[3])
print(y_test[27])
plt.imshow(x_test[27])

test = np.array([new_x, new_y])
predict=model.predict([new_x, new_y])
print(predict)