In [3]:
import os
import pandas as pd
import torch
from torchvision.io import read_image
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import torch.nn as nn

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

class CustomImageDataset(Dataset):
    def __init__(self, annotation_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotation_file, header=None)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
    
    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        slicer = slice(-9)
        img_path1 = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0][slicer], self.img_labels.iloc[idx, 0])
        img_path2 = os.path.join(self.img_dir, self.img_labels.iloc[idx, 1][slicer], self.img_labels.iloc[idx, 1])
        image1 = read_image(img_path1)
        image2 = read_image(img_path2)
        label = self.img_labels.iloc[idx, 2]
        
        if self.transform:
            image1 = self.transform(image1)
            image2 = self.transform(image2)
        if self.target_transform:
            label = self.target_transform(image1)
        #return image1, image2, label, img_path1, img_path2        
        return image1, image2, label

path_trainSet = f"/Users/necatiisik/lfw_dataset/pairsDevTrain.txt"
path_testSet = f"/Users/necatiisik/lfw_dataset/pairsDevTest.txt"

datasetPath = f"/Users/necatiisik/lfw_dataset/lfw/"
trainLabelPath = f"/Users/necatiisik/lfw_dataset/pair_train_data.csv"
testLabelPath = f"/Users/necatiisik/lfw_dataset/pair_test_data.csv"


preprocess = transforms.Compose([
  transforms.ToPILImage(),
  transforms.Resize(size=140),  # Conver 140x140 input images
  transforms.ToTensor()
#   transforms.Normalize(
#       mean=[0.6071, 0.4609, 0.3944],  # Normalization settings for the model, the calculated mean and std values
#       std=[0.2457, 0.2175, 0.2129]     # for the RGB channels of the tightly-cropped glint360k face dataset
#   )
])

train_data = CustomImageDataset(trainLabelPath, datasetPath, transform=preprocess, target_transform=None)
test_data = CustomImageDataset(testLabelPath, datasetPath, transform=preprocess, target_transform=None)

batch_size = 64

train_loader = DataLoader(dataset = train_data,
                          batch_size = batch_size, 
                          shuffle = True)

test_loader = DataLoader(dataset = test_data,
                         batch_size = batch_size, 
                         shuffle = True)

In [8]:
from __future__ import print_function
import argparse, random, copy
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision import transforms as T
from torch.optim.lr_scheduler import StepLR


class MLPnetwork(nn.Module):
    """
        Siamese network for image similarity estimation.
        The network is composed of two identical networks, one for each input.
        The output of each network is concatenated and passed to a linear layer. 
        The output of the linear layer passed through a sigmoid function.
        `"FaceNet" <https://arxiv.org/pdf/1503.03832.pdf>`_ is a variant of the Siamese network.
        This implementation varies from FaceNet as we use the `ResNet-18` model from
        `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_ as our feature extractor.
        In addition, we aren't using `TripletLoss` as the MNIST dataset is simple, so `BCELoss` can do the trick.
    """
    def __init__(self):
        super(MLPnetwork, self).__init__()

        self.layers = nn.Sequential(
        nn.Flatten(),
        nn.Linear(140 * 140 * 3, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 1)
    )

        self.sigmoid = nn.Sigmoid()

        # initialize the weights
        self.layers.apply(self.init_weights)
        
    def init_weights(self, m):
        if isinstance(m, nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)

    def forward_once(self, x):
        output = self.layers(x)
        output = output.view(output.size()[0], -1)
        return output

    def forward(self, input1, input2):
        # get two images' features
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)

        # concatenate both images' features
        output = torch.cat((output1, output2), 1)

        # pass the concatenation to the linear layers
        output = self.fc(output)

        # pass the out of the linear layers to sigmoid layer
        output = self.sigmoid(output)
        
        return output

In [9]:
model = MLPnetwork().to(device)

def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad)

num_param = count_parameters(model)

print(f"Number of parameters: {num_param:,}")

Number of parameters: 30,237,697
