# Create a Siamese Network with Triplet Loss in Keras

# Task 1: Understanding the Approach

In [2]:
%matplotlib notebook

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import random

# from pca_plotter import PCAPlotter

print('TensorFlow version:', tf.__version__)

TensorFlow version: 2.3.0


## Understanding the Approach

This appraoch is taken from the popular [FaceNet](https://arxiv.org/abs/1503.03832) paper.

We have a CNN model called `EmbeddingModel`:

![CNN](assets/CNN.png)

We use three images for each training example:
1. `person1_image1.jpg` (Anchor Example, represented below in green)
2. `person1_image2.jpg` (Positive Example, in blue)
3. `person2_image1.jpg` (Negative Example, in red).

![Embeddings](assets/embeddings.png)


## Siamese Network

All the three images of an example pass through the model, and we get the three Embeddings: One for the Anchor Example, one for the Positive Example, and one for the Negative Example.

![Siamese Network](assets/siamese.png)

The three instances of the `EmbeddingModel` shown above are not different instances. It's the same, shared model instance - i.e. the parameters are shared, and are updated for all the three paths simultaneously.

# Task 2: Importing the Data

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
print(x_train.shape)

(60000, 28, 28)


In [4]:
x_train = np.reshape(x_train, (60000, 784))/255.
x_test = np.reshape(x_test, (10000, 784))/255.
print(x_train.shape)

(60000, 784)


# Task 3: Plotting Examples

In [5]:
def plot_triplet(triplet):
    plt.figure(figsize=(6,2))
    for i in range(0, 3):
        plt.subplot(1,3,i+1)
        plt.imshow(np.reshape(triplet[i], (28, 28)), cmap='binary')
        plt.xticks([])
        plt.yticks([])
    plt.show()

In [6]:
np.reshape(x_train[0], (28, 28)).shape

(28, 28)

In [7]:
plot_triplet([x_train[0], x_train[1], x_train[2]])

<IPython.core.display.Javascript object>

# Task 4: A Batch of Triplets

In [8]:
def create_batch(batch_size):
    anchors = np.zeros((batch_size, 784))
    positives = np.zeros((batch_size, 784))
    negatives = np.zeros((batch_size, 784))
    
    for i in range(0, batch_size):
        # inidex for anchor image
        index = random.randint(0, 60000-1)
        anc = x_train[index]
        y = y_train[index]
        
        indices_for_pos = np.squeeze(np.where(y_train == y))
        indices_for_neg = np.squeeze(np.where(y_train != y))
        
        pos = x_train[indices_for_pos[random.randint(0, len(indices_for_pos)-1)]]
        neg = x_train[indices_for_neg[random.randint(0, len(indices_for_neg)-1)]]
        
        anchors[i] = anc
        positives[i] = pos
        negatives[i] = neg
    return [anchors, positives, negatives]

In [9]:
triplet = create_batch(1)
plot_triplet(triplet)

<IPython.core.display.Javascript object>

# Task 5: Embedding Model

In [10]:
# tf.keras.backend.clear_session()
emb_dim = 64

embedding_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784, )),
    tf.keras.layers.Dense(emb_dim, activation='sigmoid')
])

embedding_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                50240     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
Total params: 54,400
Trainable params: 54,400
Non-trainable params: 0
_________________________________________________________________


In [11]:
example = x_train[0]
example_emb = embedding_model.predict(np.expand_dims(example, axis=0))[0]
print(example_emb)

[0.6838639  0.44764948 0.39705908 0.56839055 0.49895105 0.4188617
 0.53619903 0.5309908  0.5819105  0.49595302 0.75120044 0.49583274
 0.5452691  0.47486266 0.56830746 0.5054675  0.38577035 0.38427448
 0.6307917  0.45909393 0.42049798 0.51866865 0.60952044 0.37662107
 0.45436522 0.59497976 0.38053453 0.37735337 0.49428895 0.50658506
 0.564975   0.5238209  0.5546161  0.36879337 0.555623   0.6369564
 0.44670993 0.31635705 0.5230224  0.5001467  0.6580795  0.5763236
 0.3691383  0.4936907  0.61399317 0.5216156  0.5440388  0.5155183
 0.5762595  0.54755616 0.44061652 0.5186474  0.48994863 0.43930563
 0.46458644 0.5373081  0.46191818 0.50821203 0.63102126 0.3281705
 0.4848626  0.5555706  0.49935654 0.603199  ]


# Task 6: Siamese Network

In [12]:
in_anc = tf.keras.layers.Input(shape=(784,))
in_pos = tf.keras.layers.Input(shape=(784,))
in_neg = tf.keras.layers.Input(shape=(784,))

em_anc = embedding_model(in_anc)
em_pos = embedding_model(in_pos)
em_neg = embedding_model(in_neg)

out = tf.keras.layers.concatenate([em_anc, em_pos, em_neg], axis=1)

net = tf.keras.models.Model(
    [in_anc, in_pos, in_neg], 
    out
)
net.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 784)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 784)]        0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 784)]        0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 64)           54400       input_1[0][0]                    
                                                                 input_2[0][0]         

# Task 7: Triplet Loss

A loss function that tries to pull the Embeddings of Anchor and Positive Examples closer, and tries to push the Embeddings of Anchor and Negative Examples away from each other.

Root mean square difference between Anchor and Positive examples in a batch of N images is:
$
\begin{equation}
d_p = \sqrt{\frac{\sum_{i=0}^{N-1}(f(a_i) - f(p_i))^2}{N}}
\end{equation}
$

Root mean square difference between Anchor and Negative examples in a batch of N images is:
$
\begin{equation}
d_n = \sqrt{\frac{\sum_{i=0}^{N-1}(f(a_i) - f(n_i))^2}{N}}
\end{equation}
$

For each example, we want:
$
\begin{equation}
d_p \leq d_n
\end{equation}
$

Therefore,
$
\begin{equation}
d_p - d_n \leq 0
\end{equation}
$

This condition is quite easily satisfied during the training.

We will make it non-trivial by adding a margin (alpha):
$
\begin{equation}
d_p - d_n + \alpha \leq 0
\end{equation}
$

Given the condition above, the Triplet Loss L is defined as:
$
\begin{equation}
L = max(d_p - d_n + \alpha, 0)
\end{equation}
$

In [13]:
# why write loss function inside triplet_loss is because we want to give alpha and emb_dim
# we also can define alpha and emb_dimi as global variable which just need to write loss function
def triplet_loss(alpha, emb_dim):
    def loss(y_true, y_pred):
        anc, pos, neg = y_pred[:, :emb_dim], y_pred[:, emb_dim:2*emb_dim], y_pred[:, 2*emb_dim:]
        dp = tf.reduce_mean(tf.square(anc - pos), axis=1)
        dn = tf.reduce_mean(tf.square(anc - neg), axis=1)
        return tf.maximum(dp - dn + alpha, 0.)
    # return function
    return loss

# Task 8: Data Generator

In [14]:
def data_generator(batch_size, emb_dim):
    while True:
        x = create_batch(batch_size)
        y = np.zeros((batch_size, 3*emb_dim))
        yield x, y

# Task 9: Model Training

In [19]:
batch_size = 1024
epochs = 10
steps_per_epoch = int(60000/batch_size)

net.compile(loss=triplet_loss(alpha=0.2, emb_dim=emb_dim), optimizer='adam', metrics=['accuracy'])

X, Y = x_test[:1000], y_test[:1000]

In [20]:
_ = net.fit(
    data_generator(batch_size, emb_dim), 
    epochs=epochs, steps_per_epoch=steps_per_epoch,
    verbose=True
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
_ = net.fit(
    data_generator(batch_size, emb_dim), 
    epochs=epochs, steps_per_epoch=steps_per_epoch,
    verbose=False,
    callbacks=[
        PCAPlotter(plt, embedding_model, X, Y)
    ]
)

<IPython.core.display.Javascript object>

  ...
    to  
  ['...']


In [24]:
x = tf.constant([[1.,2.,3.,4.],[10.,20.,30.,40.]])
tf.reduce_mean(x)

<tf.Tensor: shape=(), dtype=float32, numpy=13.75>

In [25]:
tf.reduce_sum(x)

<tf.Tensor: shape=(), dtype=float32, numpy=110.0>

In [26]:
tf.reduce_sum(x, axis=1)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 10., 100.], dtype=float32)>

In [1]:
np.random.randint(0, 1, size=(10)).astype(np.float32)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [7]:
np.random.randint(0, 1, size=(10)).astype(np.float32)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)