In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from tqdm import tqdm  # Import the tqdm library
import numpy as np

In [2]:
main_folder_path = "/kaggle/input/face-verification/IIITB-FACES"

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Splitting into Train and Test

In [4]:
train_image_paths = []
train_label = []
test_label = []
test_image_paths = []
for folder in os.listdir(main_folder_path):
    count = 0
    category_path = os.path.join(main_folder_path, folder)
    for image_name in os.listdir(category_path):
        image_path = os.path.join(category_path, image_name)
        
        if count <= 1:
            test_image_paths.append(image_path)
            test_label.append(folder)
        else:
            train_image_paths.append(image_path)
            train_label.append(folder)
        
        count += 1

# Custom Dataset

In [5]:
# Define a custom dataset
class CustomDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path)
        
        if self.transform:
            img = self.transform(img)
        
        return img

# Image Preprocessing

In [6]:
# Define preprocessing transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match input size of autoencoder
    #transforms.Grayscale(num_output_channels=1),  # Convert the image to grayscale
    transforms.ToTensor(),         # Convert to tensor
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])  # Normalize
])

# Building Train DataLoader

In [7]:
# Create Train dataset and data loader
train_dataset = CustomDataset(train_image_paths, transform=transform)
train_data_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Building Test DataLoader

In [8]:
# Create Test dataset and data loader
test_dataset = CustomDataset(test_image_paths, transform=transform)
test_data_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

# Pretrained VGG16 Model

In [9]:
# Load pretrained VGG-16 model
vgg16 = models.vgg16(pretrained=True)
modules = list(vgg16.children())[:-2]
vgg16 = nn.Sequential(*modules)

# Freeze VGG-16 layers
for param in vgg16.parameters():
    param.requires_grad = False
    
# Define the size of the output vector (code_size)
code_size = 200  # You can set this to your desired size

# Add a linear layer after the VGG16 feature map (this will be trainable)
custom_head = nn.Sequential(
    nn.Flatten(),  # Flatten the 512x7x7 feature map
    nn.Linear(512 * 7 * 7, code_size),  # Linear layer to get the code of desired size
)

# Set the 'requires_grad' attribute of the custom head layers to True
for param in custom_head.parameters():
    param.requires_grad = True

# Combine the VGG16 feature extractor and the custom head


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 259MB/s]  


# Decoder Architecture

In [10]:
class Decoder(nn.Module):
    def __init__(self,code_size):
        super(Decoder, self).__init__()
        # Transposed convolution layers to upsample
        self.deconv_layers = nn.Sequential(
            nn.Linear(code_size, 512*7*7),
            nn.Unflatten(1, (512, 7, 7)),
            nn.ConvTranspose2d(512, 256, kernel_size=4, stride=4, padding=0), # Upsample to 28x28
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=4, padding=0), # Upsample to 128x128
            nn.ReLU(),
            nn.ConvTranspose2d(128, 3, kernel_size=2, stride=2),  # Upsample to 224x224
            nn.ReLU(),
        )
            
    def forward(self, x):
        x = self.deconv_layers(x)
        return x

# Autoencoder with pretrained VGG16 and Decoder

In [11]:
# Create a new model that connects the custom head to the Decoder
class autoencoder(nn.Module):
    def __init__(self,code_size):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            vgg16,
            custom_head
        )
        self.decoder = Decoder(code_size)

    def forward(self, x):
        # Pass input through the custom head and then through the Decoder
        encoded = self.encoder(x)
        output = self.decoder(encoded)
        return output

# Hyperparameters

In [12]:
# Hyperparameters
batch_size = 32
learning_rate = 0.001
num_epochs = 50
code_size = 200

full_model = autoencoder(code_size).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(full_model.parameters(), lr=learning_rate)

# Training Autoencoder Model

In [13]:
# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0  # To accumulate the loss across batches
    for data in tqdm(train_data_loader, unit='batch'):
        images = data.to(device)  # Move images to GPU
#         print(images.shape)
        optimizer.zero_grad()
        
        reconstructed = full_model(images)
        loss = criterion(reconstructed, images)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    average_loss = total_loss / len(train_data_loader)  # Calculate average loss
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {average_loss:.4f}')

100%|██████████| 23/23 [00:18<00:00,  1.27batch/s]


Epoch [1/50], Average Loss: 0.3102


100%|██████████| 23/23 [00:08<00:00,  2.61batch/s]


Epoch [2/50], Average Loss: 0.2535


100%|██████████| 23/23 [00:08<00:00,  2.75batch/s]


Epoch [3/50], Average Loss: 0.2362


100%|██████████| 23/23 [00:08<00:00,  2.72batch/s]


Epoch [4/50], Average Loss: 0.2287


100%|██████████| 23/23 [00:08<00:00,  2.72batch/s]


Epoch [5/50], Average Loss: 0.2243


100%|██████████| 23/23 [00:08<00:00,  2.59batch/s]


Epoch [6/50], Average Loss: 0.2222


100%|██████████| 23/23 [00:08<00:00,  2.74batch/s]


Epoch [7/50], Average Loss: 0.2212


100%|██████████| 23/23 [00:08<00:00,  2.73batch/s]


Epoch [8/50], Average Loss: 0.2197


100%|██████████| 23/23 [00:08<00:00,  2.72batch/s]


Epoch [9/50], Average Loss: 0.2180


100%|██████████| 23/23 [00:08<00:00,  2.62batch/s]


Epoch [10/50], Average Loss: 0.2155


100%|██████████| 23/23 [00:08<00:00,  2.66batch/s]


Epoch [11/50], Average Loss: 0.2143


100%|██████████| 23/23 [00:08<00:00,  2.62batch/s]


Epoch [12/50], Average Loss: 0.2140


100%|██████████| 23/23 [00:08<00:00,  2.60batch/s]


Epoch [13/50], Average Loss: 0.2132


100%|██████████| 23/23 [00:08<00:00,  2.71batch/s]


Epoch [14/50], Average Loss: 0.2126


100%|██████████| 23/23 [00:08<00:00,  2.71batch/s]


Epoch [15/50], Average Loss: 0.2121


100%|██████████| 23/23 [00:08<00:00,  2.68batch/s]


Epoch [16/50], Average Loss: 0.2114


100%|██████████| 23/23 [00:08<00:00,  2.57batch/s]


Epoch [17/50], Average Loss: 0.2102


100%|██████████| 23/23 [00:08<00:00,  2.66batch/s]


Epoch [18/50], Average Loss: 0.2093


100%|██████████| 23/23 [00:08<00:00,  2.67batch/s]


Epoch [19/50], Average Loss: 0.2089


100%|██████████| 23/23 [00:08<00:00,  2.65batch/s]


Epoch [20/50], Average Loss: 0.2087


100%|██████████| 23/23 [00:08<00:00,  2.61batch/s]


Epoch [21/50], Average Loss: 0.2087


100%|██████████| 23/23 [00:08<00:00,  2.69batch/s]


Epoch [22/50], Average Loss: 0.2085


100%|██████████| 23/23 [00:08<00:00,  2.67batch/s]


Epoch [23/50], Average Loss: 0.2082


100%|██████████| 23/23 [00:08<00:00,  2.57batch/s]


Epoch [24/50], Average Loss: 0.2079


100%|██████████| 23/23 [00:08<00:00,  2.64batch/s]


Epoch [25/50], Average Loss: 0.2076


100%|██████████| 23/23 [00:08<00:00,  2.67batch/s]


Epoch [26/50], Average Loss: 0.2074


100%|██████████| 23/23 [00:08<00:00,  2.67batch/s]


Epoch [27/50], Average Loss: 0.2071


100%|██████████| 23/23 [00:08<00:00,  2.57batch/s]


Epoch [28/50], Average Loss: 0.2068


100%|██████████| 23/23 [00:08<00:00,  2.66batch/s]


Epoch [29/50], Average Loss: 0.2065


100%|██████████| 23/23 [00:08<00:00,  2.66batch/s]


Epoch [30/50], Average Loss: 0.2063


100%|██████████| 23/23 [00:08<00:00,  2.64batch/s]


Epoch [31/50], Average Loss: 0.2062


100%|██████████| 23/23 [00:09<00:00,  2.55batch/s]


Epoch [32/50], Average Loss: 0.2060


100%|██████████| 23/23 [00:08<00:00,  2.64batch/s]


Epoch [33/50], Average Loss: 0.2060


100%|██████████| 23/23 [00:08<00:00,  2.68batch/s]


Epoch [34/50], Average Loss: 0.2059


100%|██████████| 23/23 [00:09<00:00,  2.55batch/s]


Epoch [35/50], Average Loss: 0.2059


100%|██████████| 23/23 [00:08<00:00,  2.66batch/s]


Epoch [36/50], Average Loss: 0.2058


100%|██████████| 23/23 [00:08<00:00,  2.65batch/s]


Epoch [37/50], Average Loss: 0.2056


100%|██████████| 23/23 [00:08<00:00,  2.65batch/s]


Epoch [38/50], Average Loss: 0.2055


100%|██████████| 23/23 [00:09<00:00,  2.51batch/s]


Epoch [39/50], Average Loss: 0.2054


100%|██████████| 23/23 [00:08<00:00,  2.59batch/s]


Epoch [40/50], Average Loss: 0.2053


100%|██████████| 23/23 [00:08<00:00,  2.60batch/s]


Epoch [41/50], Average Loss: 0.2053


100%|██████████| 23/23 [00:09<00:00,  2.51batch/s]


Epoch [42/50], Average Loss: 0.2053


100%|██████████| 23/23 [00:08<00:00,  2.61batch/s]


Epoch [43/50], Average Loss: 0.2052


100%|██████████| 23/23 [00:08<00:00,  2.58batch/s]


Epoch [44/50], Average Loss: 0.2051


100%|██████████| 23/23 [00:08<00:00,  2.61batch/s]


Epoch [45/50], Average Loss: 0.2051


100%|██████████| 23/23 [00:09<00:00,  2.50batch/s]


Epoch [46/50], Average Loss: 0.2051


100%|██████████| 23/23 [00:08<00:00,  2.58batch/s]


Epoch [47/50], Average Loss: 0.2050


100%|██████████| 23/23 [00:08<00:00,  2.59batch/s]


Epoch [48/50], Average Loss: 0.2049


100%|██████████| 23/23 [00:09<00:00,  2.51batch/s]


Epoch [49/50], Average Loss: 0.2048


100%|██████████| 23/23 [00:08<00:00,  2.59batch/s]

Epoch [50/50], Average Loss: 0.2047





# Encoding the Test faces

In [14]:
# Encoding the Test faces
test_encoded_faces = []
with torch.no_grad():
    for data in test_data_loader:
        images = data.to(device)
        encoded = full_model.encoder(images)
        test_encoded_faces.append(encoded)

test_encoded_faces = torch.cat(test_encoded_faces, dim=0)

In [15]:
test_encoded_faces.shape

torch.Size([98, 200])

# Encoding the Train faces

In [16]:
# Encoding the Train faces
train_encoded_faces = []
with torch.no_grad():
    for data in train_data_loader:
        images = data.to(device)
        encoded = full_model.encoder(images)
        train_encoded_faces.append(encoded)

train_encoded_faces = torch.cat(train_encoded_faces, dim=0)

In [17]:
train_encoded_faces.shape

torch.Size([734, 200])

In [18]:
test_faces = test_encoded_faces.cpu().numpy()
train_faces = train_encoded_faces.cpu().numpy()

# Threshold Calculation

In [19]:
# Calculate the pairwise L2 norm distances between all vectors
pairwise_distances = np.linalg.norm(train_faces[:, np.newaxis, :] - train_faces[np.newaxis, :, :], axis=-1)

# Set diagonal elements to 0 to avoid calculating distances to itself
np.fill_diagonal(pairwise_distances, 0)

# Find the maximum L2 norm distance
max_distance = np.max(pairwise_distances)

print("Maximum L2 norm distance:", max_distance)

Maximum L2 norm distance: 215.65994


In [20]:
threshold = 0.5*max_distance
print("Threshold:- ",threshold)

Threshold:-  107.82997131347656


# Prediction for Test Images

In [21]:
def find_index(test_image,threshold):
    # Calculate the L2 norm distances between the test vector and all vectors in the matrix
    distances = np.linalg.norm(train_faces - test_image, axis=1)
    
    # Find indices of distances below the threshold
    indices_below_threshold = np.where(distances < threshold)[0]

    # Find the index of the minimum distance among those below the threshold
    index_of_min_distance = indices_below_threshold[np.argmin(distances[indices_below_threshold])]
    
    return index_of_min_distance

In [22]:
predicted = []
actual = []
for i in range(test_faces.shape[0]):
    ind = find_index(test_faces[i],threshold)
    predicted.append(train_label[ind])
    actual.append(test_label[i])

In [23]:
array1 = np.array(predicted,dtype=object)
array2 = np.array(actual,dtype=object)

# Compare the arrays element-wise and create a new array of 1s and 0s
comparison_result = (array1 == array2).astype(int)

In [24]:
comparison_result

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

# Calculating the Accuracy

In [25]:
cnt = 0
for i in comparison_result:
    if i == 1:
        cnt += 1
print("Accuracy:- ",cnt/len(comparison_result))

Accuracy:-  0.02040816326530612
