In [1]:
!pip install facenet-pytorch==2.5.2 opencv-python-headless

Collecting facenet-pytorch==2.5.2
  Downloading facenet_pytorch-2.5.2-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.5.1->torchvision->facenet-pytorch==2.5.2)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.5.1->torchvision->facenet-pytorch==2.5.2)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.5.1->torchvision->facenet-pytorch==2.5.2)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.5.1->torchvision->facenet-pytorch==2.5.2)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.5.1->torchvision->facenet-pytorch==2.5.2)
  Downloa

In [2]:
!git clone https://github.com/rohanrao619/Face_Recognition_using_Siamese_Network.git

Cloning into 'Face_Recognition_using_Siamese_Network'...
remote: Enumerating objects: 277, done.[K
remote: Counting objects: 100% (277/277), done.[K
remote: Compressing objects: 100% (275/275), done.[K
remote: Total 277 (delta 21), reused 240 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (277/277), 11.63 MiB | 14.83 MiB/s, done.
Resolving deltas: 100% (21/21), done.


In [3]:


import os
import numpy as np
from PIL import Image
import torch
from facenet_pytorch import MTCNN

# Set dataset path and desired image size (FaceNet typically uses 160x160)
DATA_DIR = '/content/Face_Recognition_using_Siamese_Network/LFW_dataset'  # Update this path to your dataset folder
IMG_SIZE = (160, 160)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(image_size=IMG_SIZE, margin=0, device=device)

def detect_and_crop_face(image_path, required_size=IMG_SIZE):
    """
    Detects the largest face in the image using MTCNN,
    casts bounding box coordinates to integers,
    crops the face, and resizes it.
    """
    try:
        img = Image.open(image_path).convert("RGB")
    except Exception as e:
        raise Exception(f"Error loading image: {e}")
    boxes, _ = mtcnn.detect(img)
    if boxes is None:
        raise Exception("No face detected")
    # Use the first detected face
    box = boxes[0]
    # Cast bounding box coordinates to int
    x1, y1, x2, y2 = map(int, box)
    # Ensure coordinates are non-negative
    x1, y1 = max(0, x1), max(0, y1)
    face = img.crop((x1, y1, x2, y2))
    face = face.resize(required_size)
    return face

def load_dataset(data_dir):
    faces = []
    labels = []
    for person in os.listdir(data_dir):
        person_folder = os.path.join(data_dir, person)
        if not os.path.isdir(person_folder):
            continue
        for file in os.listdir(person_folder):
            file_path = os.path.join(person_folder, file)
            try:
                face = detect_and_crop_face(file_path)
                # Convert image to numpy array in [0,1]
                face_np = np.array(face).astype("float32") / 255.0
                faces.append(face_np)
                labels.append(person)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
                continue
    return np.array(faces), np.array(labels)

faces, labels = load_dataset(DATA_DIR)
print(f"Loaded {faces.shape[0]} faces from {len(np.unique(labels))} identities.")


  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)


Loaded 200 faces from 20 identities.


In [4]:


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from facenet_pytorch import InceptionResnetV1

# Define a Dataset that yields triplets
class TripletFaceDataset(Dataset):
    def __init__(self, faces, labels):
        self.faces = faces  # numpy array of shape (N, H, W, C)
        self.labels = labels  # numpy array of shape (N,)
        self.unique_labels = np.unique(labels)
        self.label_to_indices = {label: np.where(labels==label)[0] for label in self.unique_labels}

    def __getitem__(self, index):
        anchor_img = self.faces[index]
        anchor_label = self.labels[index]
        # Positive: choose a different image with the same label
        pos_indices = self.label_to_indices[anchor_label]
        pos_index = index
        while pos_index == index:
            pos_index = np.random.choice(pos_indices)
        positive_img = self.faces[pos_index]
        # Negative: choose an image from a different label
        neg_label = np.random.choice(self.unique_labels[self.unique_labels != anchor_label])
        neg_index = np.random.choice(self.label_to_indices[neg_label])
        negative_img = self.faces[neg_index]

        # Convert images to torch tensors and transpose to (C, H, W)
        anchor_tensor = torch.tensor(anchor_img.transpose(2,0,1), dtype=torch.float32)
        positive_tensor = torch.tensor(positive_img.transpose(2,0,1), dtype=torch.float32)
        negative_tensor = torch.tensor(negative_img.transpose(2,0,1), dtype=torch.float32)

        return anchor_tensor, positive_tensor, negative_tensor

    def __len__(self):
        return len(self.faces)

# Create dataset and dataloader
triplet_dataset = TripletFaceDataset(faces, labels)
dataloader = DataLoader(triplet_dataset, batch_size=32, shuffle=True)

# Load the InceptionResnetV1 model (pretrained on VGGFace2)
model = InceptionResnetV1(pretrained='vggface2').to(device)
model.train()

# Define Triplet Margin Loss
margin = 0.5
criterion = nn.TripletMarginLoss(margin=margin, p=2)

optimizer = optim.Adam(model.parameters(), lr=0.0005)

num_epochs = 20
for epoch in range(1, num_epochs+1):
    epoch_loss = 0.0
    for anchor, positive, negative in dataloader:
        # Move data to device and normalize pixel values to [-1, 1]
        anchor = ((anchor.to(device)) - 0.5) * 2
        positive = ((positive.to(device)) - 0.5) * 2
        negative = ((negative.to(device)) - 0.5) * 2

        optimizer.zero_grad()
        emb_anchor = model(anchor)
        emb_positive = model(positive)
        emb_negative = model(negative)

        loss = criterion(emb_anchor, emb_positive, emb_negative)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(dataloader)
    print(f"Epoch {epoch}/{num_epochs} - Avg Triplet Loss: {avg_loss:.4f}")

# Save the fine-tuned model weights
torch.save(model.state_dict(), "resnet_face_triplet.pth")
print("Model saved as resnet_face_triplet.pth")


  0%|          | 0.00/107M [00:00<?, ?B/s]

  state_dict = torch.load(cached_file)


Epoch 1/20 - Avg Triplet Loss: 0.0545
Epoch 2/20 - Avg Triplet Loss: 0.0449
Epoch 3/20 - Avg Triplet Loss: 0.0577
Epoch 4/20 - Avg Triplet Loss: 0.0419
Epoch 5/20 - Avg Triplet Loss: 0.0362
Epoch 6/20 - Avg Triplet Loss: 0.0396
Epoch 7/20 - Avg Triplet Loss: 0.0246
Epoch 8/20 - Avg Triplet Loss: 0.0434
Epoch 9/20 - Avg Triplet Loss: 0.0238
Epoch 10/20 - Avg Triplet Loss: 0.0406
Epoch 11/20 - Avg Triplet Loss: 0.0276
Epoch 12/20 - Avg Triplet Loss: 0.0358
Epoch 13/20 - Avg Triplet Loss: 0.0179
Epoch 14/20 - Avg Triplet Loss: 0.0453
Epoch 15/20 - Avg Triplet Loss: 0.0424
Epoch 16/20 - Avg Triplet Loss: 0.0442
Epoch 17/20 - Avg Triplet Loss: 0.0293
Epoch 18/20 - Avg Triplet Loss: 0.0237
Epoch 19/20 - Avg Triplet Loss: 0.0340
Epoch 20/20 - Avg Triplet Loss: 0.0444
Model saved as resnet_face_triplet.pth


In [5]:
# %% Cell 3: Evaluation
import random
model.eval()

def compute_distance(emb1, emb2):
    return torch.norm(emb1 - emb2, p=2).item()

indices = random.sample(range(len(triplet_dataset)), 5)
for idx in indices:
    anchor, positive, negative = triplet_dataset[idx]
    anchor = (((anchor.unsqueeze(0).to(device)) - 0.5) * 2)
    positive = (((positive.unsqueeze(0).to(device)) - 0.5) * 2)
    negative = (((negative.unsqueeze(0).to(device)) - 0.5) * 2)

    with torch.no_grad():
        emb_anchor = model(anchor)
        emb_positive = model(positive)
        emb_negative = model(negative)

    pos_dist = compute_distance(emb_anchor, emb_positive)
    neg_dist = compute_distance(emb_anchor, emb_negative)
    print(f"Anchor-Positive Distance: {pos_dist:.4f} | Anchor-Negative Distance: {neg_dist:.4f}")


Anchor-Positive Distance: 0.3884 | Anchor-Negative Distance: 1.4110
Anchor-Positive Distance: 0.1939 | Anchor-Negative Distance: 1.7439
Anchor-Positive Distance: 0.1887 | Anchor-Negative Distance: 1.6422
Anchor-Positive Distance: 0.6721 | Anchor-Negative Distance: 1.3645
Anchor-Positive Distance: 0.6129 | Anchor-Negative Distance: 1.7046
