# Siamese Network for calculating Face similarity

#### "is this the claimed person?". We will try to code a simple network for claculating similarity between input faces like the one we use to unlock a phone. This is a 1:1 matching problem.

### 1. Import dependencies before any thing else

In [None]:
#import dependencies

%matplotlib inline
import matplotlib.pyplot as plt
import os
import numpy as np
import random
from PIL import Image
import PIL.ImageOps    

import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import torchvision.utils
import torch
from torch.autograd import Variable
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

### 2. Prepare Dataset

- We will use the following structure for files
    - Anchor dir will contain all the anchor files (Files to be stored in the database)
    - Positive dir will contain all the positive files (Files to be matched against the anchor files)
    - Negative dir will contain all the files that are a negative match against the anchor files

In [None]:
#data paths
base_path = os.path.join('drive', 'MyDrive', 'siamese_data', 'data')
POS_PATH = os.path.join(base_path, 'anchors')
NEG_PATH = os.path.join(base_path, 'negative')
ANC_PATH = os.path.join(base_path, 'positives')

INPUT_SHAPE = (100, 100, 3) #shape of the input

<br>Below chunk of code will populate three arrays (POS, ANC, NEG) with appropriate file names form the directories

In [None]:
#prepare files in array
POS = []
ANC = []
NEG = []
for i, filename in enumerate(os.listdir(POS_PATH)):
    if(i > 4000):
        break
    f = os.path.join(POS_PATH,filename)
    if os.path.isfile(f):
        POS.append(f)
        ANC.append(os.path.join(ANC_PATH,filename))
for i, filename in enumerate(os.listdir(NEG_PATH)):
    if(i > 4000):
        break
    f = os.path.join(NEG_PATH,filename)
    NEG.append(os.path.join(NEG_PATH,filename))

DATASET = []
for i in range(4000):
    DATASET.append([ANC[i], POS[i], 0])
for i in range(4000):
    DATASET.append([ANC[i], NEG[i], 1])

DATASET = np.array(DATASET)
np.random.shuffle(DATASET)
TRAIN_DATA = DATASET[0:round(8000 * 0.8)]
TEST_DATA = DATASET[round(8000 * 0.8):]

<br>Helper function to visualise images

In [None]:
# Creating some helper functions
def imshow(img, text=None):
    npimg = img.numpy()
    plt.axis("off")
    if text:
        plt.text(75, 8, text, style='italic',fontweight='bold',
            bbox={'facecolor':'white', 'alpha':0.8, 'pad':10})
        
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()    

def show_plot(iteration,loss):
    plt.plot(iteration,loss)
    plt.show()

<br> Since we have already populated the respective arrays for **POSITIVES**, **NEGATIVES**, AND **ANCHORS**, We will now convert them to the **Dataset** and map our preprocessing function to each entry in the dataset

In [None]:
class SiameseNetworkDataset(Dataset):
    def __init__(self,imageFolderDataset,transform=None):
        self.imageFolderDataset = imageFolderDataset    
        self.transform = transform
        
    def __getitem__(self,index):
        img_tuple = random.choice(self.imageFolderDataset)
        label = img_tuple[2]

        img0 = Image.open(img_tuple[0])
        img1 = Image.open(img_tuple[1])

        img0 = img0.convert("L")
        img1 = img1.convert("L")

        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
        
        return img0, img1, torch.from_numpy(np.array([label], dtype=np.float32))
    
    def __len__(self):
        return len(self.imageFolderDataset)

<br>Initialise our **dataset** and create a **dataloader** for our model

In [None]:
transformation = transforms.Compose([transforms.Resize((100,100)),
                                     transforms.ToTensor()
                                    ]) # initailise transformations to be applied on the elements of the dataset

# Initialize the dataset
siamese_train_dataset = SiameseNetworkDataset(TRAIN_DATA, transform=transformation)

# Create a simple dataloader
train_dataset = DataLoader(siamese_train_dataset,
                        shuffle=True,
                        num_workers=8,
                        batch_size=128) # increased batch size for smooth loss function and less fluctuations.
                                        # Try different values.

### Prepare our model

We will use a simple **convolitional model** to encode our input image into vectors and calculate the similarity between different encodings to verify users. The task is to learn these embeddinds or vectors from images such that similar images have similar encodings/vectors and vice versa. We will try to learn these embeddings with a simese network architecture as shown in the image below. The $ F,G $ are the same models called twice to convert and **anchor** image and an **input** image *(positive/negative)* and calculate the absolute difference between the outputs (L1 Norm).

$$ L_1 = \vert F - G \vert  $$

<img src="images/model.png" width=400 height=100 />

The output of this siamese network is fed to a sigmoid unit to predict the probability of the images being the same.

$$ \sigma(|F-G|) =     \left\{ \begin{array}{rcl}
         1 & \mbox{for}
         & \sigma(|F-G|) > 0.5 \\ 
         0  & \mbox{for} & \sigma(|F-G|) \leq 0.5
                \end{array}\right. $$
                
$$ \sigma(x) = \left\{ \begin{array}{rcl}
         [0,1] & \mbox{for}
         & x \in \mathbb{R}^n
                \end{array}\right. $$

In [None]:
class FaceModel(nn.Module):
    
    def __init__(self):
        super(FaceModel, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 96, kernel_size = (11, 11), stride = (4,4))
        self.conv2 = nn.Conv2d(96, 256, kernel_size = (5, 5), stride = (1,1))
        self.conv1 = nn.Conv2d(256, 384, kernel_size = (3, 3), stride = (1,1))
        
        secf.fc1 = nn.Linear(384, 1024)
        secf.fc2 = nn.Linear(1024, 128)
        secf.fc3 = nn.Linear(128, 2)
        
    def model(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size = (3,3), stride = (2, 2))
        
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size = (2,2), stride = (2, 2))
        
        x = self.conv1(x)
        x = F.relu(x)
        
        x = self.fc1(x)
        x = F.relu(x)
        
        x = self.fc2(x)
        x = F.relu(x)
    
        x = self.fc3(x)
        x = F.relu(x)
        
        return x
    
    def forward(self, anchor, inp):
        x1 = self.model(anchor)
        x2 = self.model(inp)
        
        return x1, x2
        
    

<br>Initialise our loss function. In this case it is just a version that uses euclidean distance measure.

In [None]:
class ContrastiveLoss(torch.nn.Module):
    
    def __init__(self, margin = 2.0):
        super(ContrastiveLoss, self).__init__()
    
    def forward(self, output1, output2, label):
        
        euclid_dist = F.pairwise_distance(output1, output2, keepdim = True)
        loss = torch.mean((1 - label) * torch.pow(euclid_dist, 2) + 
                (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        
        return loss
        

#### 3. Initialise Mode, Loss and Optimiser

In [None]:
net = SiameseNetwork().cuda()
criterion = ContrastiveLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.0005 )

#### 4. Train Model and make Predictions
We will run our training loop for 20 epochs. You can increase the number of epochs.

In [None]:
counter = []
loss_history = [] 
iteration_number= 0

# Iterate throught the epochs
for epoch in range(20):

    # Iterate over batches
    for i, (img0, img1, label) in enumerate(train_dataloader, 0):

        # Send the images and labels to CUDA
        img0, img1, label = img0.cuda(), img1.cuda(), label.cuda()

        # Zero the gradients
        optimizer.zero_grad()

        # Pass in the two images into the network and obtain two outputs
        output1, output2 = net(img0, img1)

        # Pass the outputs of the networks and label into the loss function
        loss_contrastive = criterion(output1, output2, label)

        # Calculate the backpropagation
        loss_contrastive.backward()

        # Optimize
        optimizer.step()

        # Every 10 batches print out the loss
        if (i+1) % 10 == 0 :
            print(f"Epoch number {epoch}\n Current loss {loss_contrastive.item()}\n")
            iteration_number += 10

            counter.append(iteration_number)
            loss_history.append(loss_contrastive.item())

show_plot(counter, loss_history)

<br> Make Predictions on our test data we prepared earlier. We will convert Test data array into a DataLoader to feed the model for predictions

In [None]:
def makeOutput(x, margin) :
    limit = margin / 2
    
    if(x < margin):
        return 0
    return 1

In [None]:
# Locate the test dataset and load it into the SiameseNetworkDataset
siamese_test_dataset = SiameseNetworkDataset(TEST_DATA,
                                        transform=transformation)
test_dataloader = DataLoader(siamese_test_dataset, num_workers=2, batch_size=1, shuffle=True)

# Grab one image that we are going to test
dataiter = iter(test_dataloader)

for i in range(20):
    # Iterate over 10 images and test them with the first image (x0)
    x0, x1, label2 = next(dataiter)

    # Concatenate the two images together
    concatenated = torch.cat((x0, x1), 0)
    
    output1, output2 = net(x0.cuda(), x1.cuda())
    euclidean_distance = F.pairwise_distance(output1, output2)
    
    label = makeOutput(euclidean_distance.item(), 2)
    imshow(torchvision.utils.make_grid(concatenated), f'Label: {label}, Dissimilarity : {euclidean_distance.item():.2f}')