In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import tensorflow as tf
from keras.layers import Dense, Dropout, BatchNormalization, GlobalAveragePooling2D, Flatten
from keras import Model
import keras
import warnings
import pandas as pd

warnings.filterwarnings("ignore")

In [7]:

def build_embedding_generator(k_layers_to_tune=10):

    base_model = tf.keras.applications.ResNet50V2(weights="imagenet", 
                                                      input_shape=(100, 100, 3),
                                                      include_top = False)

    for l in base_model.layers[:-k_layers_to_tune]:
        l.trainable = False
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Flatten()(x)
    x = Dense(512, activation="relu")(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    x = Dense(256, activation="relu")(x)
    x = BatchNormalization()(x)
    x = Dense(128, activation="sigmoid")(x)
    #x = tf.nn.l2_normalize(x, axis=1)
    
    embedding_model = Model(base_model.input, x, name="Embedding")

    return embedding_model


In [8]:
embedding_model = build_embedding_generator(15)

In [9]:
embedding_model.summary()

Model: "Embedding"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 100, 100, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 106, 106, 3)          0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 50, 50, 64)           9472      ['conv1_pad[0][0]']           
                                                                                                  
 pool1_pad (ZeroPadding2D)   (None, 52, 52, 64)           0         ['conv1_conv[0][0]']          
                                                                                          

 tchNormalization)                                                                                
                                                                                                  
 conv2_block3_preact_relu (  (None, 25, 25, 256)          0         ['conv2_block3_preact_bn[0][0]
 Activation)                                                        ']                            
                                                                                                  
 conv2_block3_1_conv (Conv2  (None, 25, 25, 64)           16384     ['conv2_block3_preact_relu[0][
 D)                                                                 0]']                          
                                                                                                  
 conv2_block3_1_bn (BatchNo  (None, 25, 25, 64)           256       ['conv2_block3_1_conv[0][0]'] 
 rmalization)                                                                                     
          

 conv3_block2_1_relu (Activ  (None, 13, 13, 128)          0         ['conv3_block2_1_bn[0][0]']   
 ation)                                                                                           
                                                                                                  
 conv3_block2_2_pad (ZeroPa  (None, 15, 15, 128)          0         ['conv3_block2_1_relu[0][0]'] 
 dding2D)                                                                                         
                                                                                                  
 conv3_block2_2_conv (Conv2  (None, 13, 13, 128)          147456    ['conv3_block2_2_pad[0][0]']  
 D)                                                                                               
                                                                                                  
 conv3_block2_2_bn (BatchNo  (None, 13, 13, 128)          512       ['conv3_block2_2_conv[0][0]'] 
 rmalizati

                                                                                                  
 conv3_block4_3_conv (Conv2  (None, 7, 7, 512)            66048     ['conv3_block4_2_relu[0][0]'] 
 D)                                                                                               
                                                                                                  
 conv3_block4_out (Add)      (None, 7, 7, 512)            0         ['max_pooling2d_4[0][0]',     
                                                                     'conv3_block4_3_conv[0][0]'] 
                                                                                                  
 conv4_block1_preact_bn (Ba  (None, 7, 7, 512)            2048      ['conv3_block4_out[0][0]']    
 tchNormalization)                                                                                
                                                                                                  
 conv4_blo

 D)                                                                 0]']                          
                                                                                                  
 conv4_block3_1_bn (BatchNo  (None, 7, 7, 256)            1024      ['conv4_block3_1_conv[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv4_block3_1_relu (Activ  (None, 7, 7, 256)            0         ['conv4_block3_1_bn[0][0]']   
 ation)                                                                                           
                                                                                                  
 conv4_block3_2_pad (ZeroPa  (None, 9, 9, 256)            0         ['conv4_block3_1_relu[0][0]'] 
 dding2D)                                                                                         
          

 conv4_block5_2_relu (Activ  (None, 7, 7, 256)            0         ['conv4_block5_2_bn[0][0]']   
 ation)                                                                                           
                                                                                                  
 conv4_block5_3_conv (Conv2  (None, 7, 7, 1024)           263168    ['conv4_block5_2_relu[0][0]'] 
 D)                                                                                               
                                                                                                  
 conv4_block5_out (Add)      (None, 7, 7, 1024)           0         ['conv4_block4_out[0][0]',    
                                                                     'conv4_block5_3_conv[0][0]'] 
                                                                                                  
 conv4_block6_preact_bn (Ba  (None, 7, 7, 1024)           4096      ['conv4_block5_out[0][0]']    
 tchNormal

                                                                                                  
 conv5_block2_preact_relu (  (None, 4, 4, 2048)           0         ['conv5_block2_preact_bn[0][0]
 Activation)                                                        ']                            
                                                                                                  
 conv5_block2_1_conv (Conv2  (None, 4, 4, 512)            1048576   ['conv5_block2_preact_relu[0][
 D)                                                                 0]']                          
                                                                                                  
 conv5_block2_1_bn (BatchNo  (None, 4, 4, 512)            2048      ['conv5_block2_1_conv[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv5_blo

 dense_4 (Dense)             (None, 256)                  131328    ['dropout_1[0][0]']           
                                                                                                  
 batch_normalization_3 (Bat  (None, 256)                  1024      ['dense_4[0][0]']             
 chNormalization)                                                                                 
                                                                                                  
 dense_5 (Dense)             (None, 128)                  32896     ['batch_normalization_3[0][0]'
                                                                    ]                             
                                                                                                  
Total params: 24781184 (94.53 MB)
Trainable params: 6734208 (25.69 MB)
Non-trainable params: 18046976 (68.84 MB)
_______________________________________________________________________________________________

### Custom layers & Model

In [10]:
class DistanceLayer(tf.keras.layers.Layer):
    """
    This layer is responsible for computing the distance between the anchor
    embedding and the positive embedding, and the anchor embedding and the
    negative embedding.
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, anchor, positive, negative):

        
        anchor_pos_distance = tf.reduce_sum(tf.square(anchor - positive), axis=-1)
        anchor_neg_distance = tf.reduce_sum(tf.square(anchor - negative), axis=-1)

        return (anchor_pos_distance, anchor_neg_distance)


def build_siamesenetwork(embedding_model):

    anchor_input = keras.layers.Input(name="anchor", shape=(100, 100, 3))
    pos_input = keras.layers.Input(name="positive", shape=(100, 100, 3))
    neg_input = keras.layers.Input(name="negative", shape=(100, 100, 3))

    distances = DistanceLayer()(
        embedding_model(anchor_input),
        embedding_model(pos_input),
        embedding_model(neg_input)
    )

    siamese_network = Model(
            inputs=[anchor_input, pos_input, neg_input],
            outputs=distances
    )

    return siamese_network



In [11]:
class SiameseModel(Model):
    """The Siamese Network model with a custom training and testing loops.

    Computes the triplet loss using the three embeddings produced by the
    Siamese Network.

    The triplet loss is defined as:
       L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖² + margin, 0)
    """
    def __init__(self, siamese_network, margin=0.5):
        super().__init__()
        self.siamese_network = siamese_network
        self.margin = margin
        self.loss_tracker = keras.metrics.Mean(name="loss")

    def call(self, inputs):
        return self.siamese_network(inputs)

    

    def train_step(self, data):

        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)

        gradients = tape.gradient(loss, self.siamese_network.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients, self.siamese_network.trainable_weights))
        
        self.loss_tracker.update_state(loss)

        return {"loss" : self.loss_tracker.result()}

    def _compute_loss(self, data):

        ap_distance, an_distance = self.siamese_network(data)

        loss = ap_distance - an_distance
        #loss = an_distance - ap_distance
        loss = tf.maximum(loss + self.margin, 0.0)
        return loss

    def test_step(self, data):
        loss = self._compute_loss(data)
        self.loss_tracker.update_state(loss)

        return {"loss" : self.loss_tracker.result()}

    @property
    def metrics(self):

        return [self.loss_tracker]


In [12]:
siam_net = build_siamesenetwork(embedding_model)

In [13]:
siam_model = SiameseModel(siam_net, margin=0.7)

## UTILS


In [14]:
import pandas as pd
import numpy as np

In [15]:
PATH_TO_IMGS = "../images/"

triplets_df = pd.read_csv("../triplets.csv")




def parse_csv_line(line):
    columns = ['anchor', 'id1', 'pos', 'id2', 'neg', 'id3']
    
    # Decode the CSV line
    record_defaults = [''] * 6  # All fields are strings
    parsed_line = tf.io.decode_csv(line, record_defaults)
    parsed_line = dict(zip(columns, parsed_line))
    return parsed_line


def load_and_preprocess_image(path):
    
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [100, 100])
    image = image / 255.0
    return image



def create_triplet_dataset(csv_file_path, batch_size=32):
    dataset = tf.data.TextLineDataset(csv_file_path)
    # Skip the header line
    dataset = dataset.skip(1)
    
    # Parse each line
    dataset = dataset.map(lambda line: parse_csv_line(line))
    # Load and preprocess the images
    def load_images(parsed_line):

        base_path = tf.constant(PATH_TO_IMGS)
        
        anchor_path = tf.strings.join([base_path, parsed_line['anchor']], separator='')
        pos_path = tf.strings.join([base_path, parsed_line['pos']], separator='')
        neg_path = tf.strings.join([base_path, parsed_line['neg']], separator='')

        
        anchor = load_and_preprocess_image(anchor_path)
        pos = load_and_preprocess_image(pos_path)
        neg = load_and_preprocess_image(neg_path)
        return anchor, pos, neg

    
    dataset = dataset.map(load_images)
    dataset = dataset.batch(batch_size)
    return dataset


In [16]:
dataset = create_triplet_dataset("../triplets.csv", batch_size=24)

In [17]:
batch = next(iter(dataset.take(1)))

batch

(<tf.Tensor: shape=(24, 100, 100, 3), dtype=float32, numpy=
 array([[[[1.42117634e-01, 6.76078424e-02, 8.15686025e-03],
          [1.96458831e-01, 1.24169417e-01, 5.08117639e-02],
          [2.64396071e-01, 1.84388235e-01, 1.03474505e-01],
          ...,
          [1.83654949e-01, 8.66941139e-02, 2.95372289e-02],
          [1.83762282e-01, 9.45662037e-02, 3.96642387e-02],
          [2.00941190e-01, 1.14666663e-01, 5.97647056e-02]],
 
         [[2.39372551e-01, 1.62980393e-01, 9.44313705e-02],
          [2.80975699e-01, 2.00142741e-01, 1.25750601e-01],
          [3.23627442e-01, 2.39019603e-01, 1.54149011e-01],
          ...,
          [1.83219671e-01, 8.62588286e-02, 2.83019617e-02],
          [1.87052429e-01, 9.80963334e-02, 4.10720222e-02],
          [2.21176475e-01, 1.35843128e-01, 7.81176463e-02]],
 
         [[2.43137255e-01, 1.59215674e-01, 8.15686211e-02],
          [2.90466666e-01, 2.06545100e-01, 1.23054892e-01],
          [3.66196066e-01, 2.79117674e-01, 1.86862737e-01],
    

In [11]:
checkpoint_filepath = 'checkpoints/face_effnet_weights_{epoch:02d}.h5'


model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,  # Set to False to save the entire model     # Set to False to save the model after every epoch regardless of performance
    verbose=1                 # Logs a message whenever a model is save9d
)

In [19]:
siam_model.compile(optimizer=keras.optimizers.Adam(0.001), 
                   )

siam_model.fit(batch, 
               epochs=20,
               #callbacks=[model_checkpoint_callback]
               )

#embedding_model.save("embeddings_face_trained.h5")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


KeyboardInterrupt: 

In [72]:
embedding_model.save("embeddings_face_4ep.h5")



In [20]:
anch_embds = embedding_model.predict(batch[0])
pos_embds = embedding_model.predict(batch[1])
neg_embds = embedding_model.predict(batch[2])



In [53]:
cossim = keras.metrics.CosineSimilarity()

i = 22

pos_sim = cossim(anch_embds[i], pos_embds[i])
neg_sim = cossim(anch_embds[i], neg_embds[i])

print(pos_sim)
print(neg_sim)

tf.Tensor(0.9985318, shape=(), dtype=float32)
tf.Tensor(0.9810132, shape=(), dtype=float32)


In [32]:
pos_sim

<tf.Tensor: shape=(), dtype=float32, numpy=0.9879217>

In [33]:
neg_sim

<tf.Tensor: shape=(), dtype=float32, numpy=0.97815406>

In [29]:
d_p = tf.reduce_mean(tf.square(anch_embds - pos_embds), axis=-1)

d_n = tf.reduce_mean(tf.square(anch_embds - neg_emds), axis=-1)

In [35]:
np.linalg.norm(anch_embds[2] - pos_embds[2])

0.93990636

In [34]:
np.linalg.norm(anch_embds[2] - neg_embds[2])

0.96476966

In [80]:
d_n[2]

<tf.Tensor: shape=(), dtype=float32, numpy=0.0011390793>

In [86]:
pos_sim

<tf.Tensor: shape=(), dtype=float32, numpy=0.94077945>

In [87]:
neg_sim

<tf.Tensor: shape=(), dtype=float32, numpy=0.95942974>

In [42]:
tf.reduce_mean(d_p)

<tf.Tensor: shape=(), dtype=float32, numpy=0.003309095>

In [43]:
np.dot(.pos_embds[0])

array([ -4.220223  ,  -4.094585  ,   1.8616943 ,  -4.730528  ,
         1.7199237 ,  -2.9923196 ,  -0.71765304,   3.2444282 ,
        -1.4946157 ,   2.9306054 ,  -5.7649603 ,  -4.2568016 ,
        -2.8130176 ,  -5.20363   ,   5.2181096 ,  -4.2942576 ,
        -3.0595117 ,  -6.4208937 ,   8.628557  ,  -0.35944593,
         5.250349  ,  -0.55359447,  -1.166327  ,   2.8359075 ,
         3.9243643 ,  -7.1290317 ,  -0.22900014,  -0.8546556 ,
         7.6698065 ,   1.4251083 ,   7.000401  , -10.723488  ,
        -3.4239972 ,  -1.5374297 ,   3.4793866 ,   2.0072205 ,
         2.8157723 ,   3.6379566 ,  -7.204947  ,  -0.2722483 ,
         0.492214  ,   1.0479809 ,   1.3769054 ,   3.7842267 ,
        -5.194569  ,  -5.696434  ,  -5.8673477 ,  -0.28007028,
        -5.3335047 ,   4.3239017 ,  -3.2170706 ,  -2.607987  ,
         1.2778978 ,   6.908159  ,  -2.8113675 ,   4.5870624 ,
        -0.03847475,   1.0731807 ,   3.2307303 ,  -7.840382  ,
        -7.1091623 ,   5.9417615 ,   8.099527  ,  -1.96