## What IS Siamese network

Siamese network is a type of neural network architecture that involves two or more identical subnetworks joined at their outputs. These subnetworks share the same parameters and weights. Siamese networks are primarily used for tasks involving similarity comparison, such as face verification, signature verification, and one-shot learning.

Siamese networks are powerful for tasks requiring comparison and have the advantage of being efficient in learning from limited data due to their shared architecture.

In [None]:
!pip3 install tensorflow==2.8.4 tensorflow-gpu==2.8.4
!pip3 install cuda-python

Collecting tensorflow==2.8.4
  Downloading tensorflow-2.8.4-cp310-cp310-manylinux2010_x86_64.whl.metadata (2.9 kB)
Collecting tensorflow-gpu==2.8.4
  Downloading tensorflow_gpu-2.8.4-cp310-cp310-manylinux2010_x86_64.whl.metadata (2.9 kB)
Collecting keras-preprocessing>=1.1.1 (from tensorflow==2.8.4)
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting protobuf<3.20,>=3.9.2 (from tensorflow==2.8.4)
  Downloading protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (787 bytes)
Collecting tensorboard<2.9,>=2.8 (from tensorflow==2.8.4)
  Downloading tensorboard-2.8.0-py3-none-any.whl.metadata (1.9 kB)
Collecting tensorflow-estimator<2.9,>=2.8 (from tensorflow==2.8.4)
  Downloading tensorflow_estimator-2.8.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting keras<2.9,>=2.8.0rc0 (from tensorflow==2.8.4)
  Downloading keras-2.8.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting google-auth-oauthlib<0.5,>=0.4.1 (from tensorbo

In [None]:
import os
import random
import cv2 as cv
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import Model
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import save_model, load_model
from keras.preprocessing.image import ImageDataGenerator

In [None]:
tf.__version__

In [None]:
# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus: 
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
folder_name = os.listdir("/kaggle/input/labelled-faces-in-the-wild-lfw/lfw2")

imges_path = []
labels = []

index = 0
while(index <= 100000):
    
    if(index%1000 == 0):print(index)
        
    folder_1 = random.choice(folder_name)
    folder_2 = random.choice(folder_name)
    
    if(folder_1 != folder_2):
        
        folder_1_ = os.listdir(f"/kaggle/input/labelled-faces-in-the-wild-lfw/lfw2/{folder_1}")
        folder_2_ = os.listdir(f"/kaggle/input/labelled-faces-in-the-wild-lfw/lfw2/{folder_2}")
        
        for i in folder_1_:
            for j in folder_2_:
                
                img_1_path = f"/kaggle/input/labelled-faces-in-the-wild-lfw/lfw2/{folder_1}/{i}"
                img_2_path = f"/kaggle/input/labelled-faces-in-the-wild-lfw/lfw2/{folder_2}/{j}"
                
                imges_path.append([img_1_path,img_2_path])
                labels.append(0)
                
                index+=1;
        
    
print(index)
print("----------------") 
index = 0

while(index <= 100000):
    
    if(index%1000 == 0):print(index)
        
    folder_1 = random.choice(folder_name)
    
    folder_1_ = os.listdir(f"/kaggle/input/labelled-faces-in-the-wild-lfw/lfw2/{folder_1}")

    for i in folder_1_:
        for j in folder_1_:
            
            img_1_path = f"/kaggle/input/labelled-faces-in-the-wild-lfw/lfw2/{folder_1}/{i}"
            img_2_path = f"/kaggle/input/labelled-faces-in-the-wild-lfw/lfw2/{folder_1}/{j}"

            imges_path.append([img_1_path , img_2_path])
            labels.append(1)

            index+=1;
    folder_name.remove(folder_1)
print(index)

folder_name = 0

# Randomly Choose The data Index 

In [None]:
size = 30000

labels = np.array(labels)
zero_indices = np.where(labels == 0)[0]
one_indices = np.where(labels == 1)[0]
selected_zero_indices = np.random.choice(zero_indices, size)
selected_one_indices = np.random.choice(one_indices, size+2000)
random_numbers = np.concatenate([selected_zero_indices, selected_one_indices])

## Load images and create image pairs

In [None]:
images = []
label = []

for index,data in enumerate(random_numbers):
    if index%1000 == 0:print(index)
        
    ima_0 = cv.resize(cv.imread(imges_path[data][0]), (70, 70)) 
    ima_1 = cv.resize(cv.imread(imges_path[data][1]), (70, 70)) 
    
    #ima_0 = cv.cvtColor(ima_0, cv.COLOR_BGR2RGB)
    #ima_1 = cv.cvtColor(ima_1, cv.COLOR_BGR2RGB)

    images.append([ima_0, ima_1])
    label.append(labels[data])
    
    

imges_path = 0
random_numbers = 0
labels = 0
selected_zero_indices = 0
selected_one_indices = 0

images = np.array(images)
label = np.array(label)

In [None]:
unique, counts = np.unique(label, return_counts=True)
unique, counts

# Train Test Split

In [None]:
images,images_test,label, label_test = train_test_split(images,label,test_size=0.2,shuffle=label)

In [None]:
ig, axs = plt.subplots(4, 4, figsize=(5, 5))

for i, ax in enumerate(axs.flatten()):
    ax.imshow(images[i][0])
    ax.axis('off')

plt.show()

In [None]:
ig, axs = plt.subplots(4, 4, figsize=(5, 5))

for i, ax in enumerate(axs.flatten()):
    ax.imshow(images_test[i][0])
    ax.axis('off')

plt.show()

## Base Model

In [None]:
def create_base_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
    x = layers.Conv2D(32, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D((2,2))(x)
    x = layers.Conv2D(32, (3, 3), activation='relu')(x)
    x = layers.Conv2D(32, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D((2,2))(x)
    x = layers.Conv2D(32, (3, 3), activation='relu')(x)
    x = layers.Conv2D(32, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D((2,2))(x)
    x = layers.Flatten()(x)
    x = layers.Dense(2048, activation='relu')(x)

    return Model(inputs , x)


# Siamese Network

In [None]:
class L1Dist(layers.Layer):
    
    # Init method - inheritance
    def __init__(self, **kwargs):
        super().__init__()
       
    # Magic happens here - similarity calculation
    def call(self, input_embedding, validation_embedding):
        return tf.math.abs(input_embedding - validation_embedding)
    
siamese_layer = L1Dist()

def create_siamese_model(input_shape):
    
    base_model = create_base_model(input_shape)
    input1 = layers.Input(shape=input_shape)
    input2 = layers.Input(shape=input_shape)
    features1 = base_model(input1)
    features2 = base_model(input2)
    
    distance = siamese_layer(features1, features2)


    outputs = layers.Dense(1, activation='sigmoid')(distance)
    
    return Model([input1, input2], outputs)

In [None]:
input_shape = (70, 70, 3)
model = create_siamese_model(input_shape)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model Summary

In [None]:
model.summary()

# Callback Functions to save the Best Model 

In [None]:
less_val_loss = ModelCheckpoint('/kaggle/working/best_val.h5', save_best_only=True, monitor='val_accuracy', mode='max')

less_train_loss = ModelCheckpoint('/kaggle/working/best_train.h5', save_best_only=True, monitor='accuracy', mode='max')

In [None]:
datagen = ImageDataGenerator(rotation_range=80, 
                             width_shift_range=0.2, 
                             height_shift_range=0.2,
                             fill_mode='nearest',
                             horizontal_flip=True,
                             brightness_range=[0.2,1.5],
                             zoom_range=0.2,
                             
                            )

## Train The Model

In [None]:
model.fit(#[images[:,0],images[:,1]], label,
        datagen.flow([images[:,0],images[:,1]], label, batch_size=64),
        epochs=40,
        shuffle=True,
        validation_data=([images_test[:,0],images_test[:,1]], label_test),
        callbacks=[less_val_loss, less_train_loss])

# Save The Model

In [None]:
model.save('/kaggle/working/saved_model/my_model.h5')

# Laod The Model

In [None]:
def load_siamese_model(model_path):
    return load_model(model_path, custom_objects={'L1Dist': L1Dist})

loaded_model = load_siamese_model('/kaggle/working/best_train.h5')

# Check the loaded model
loaded_model.summary()

## Test By Reallife Image

In [None]:
image1 = cv.imread('/kaggle/input/test-img/20240409_235932-EDIT.jpg')
image2 = cv.imread('/kaggle/input/test-img/IMG20240306114142-EDIT.jpg')

image1 = cv.resize(image1, (70, 70))
image2 = cv.resize(image2, (70, 70))

image1 = cv.cvtColor(image1, cv.COLOR_BGR2RGB)
image2 = cv.cvtColor(image2, cv.COLOR_BGR2RGB)

image1 = np.expand_dims(image1, axis=0)
image2 = np.expand_dims(image2, axis=0)

In [None]:
model.predict([image1,image2])[0][0]