In [2]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

Looking in indexes: https://download.pytorch.org/whl/cu126
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collect

In [4]:
!git clone https://github.com/sathishkumar67/Face-Recognition-using-Resnet.git
!mv /kaggle/working/Face-Recognition-using-Resnet/* /kaggle/working/

Cloning into 'Face-Recognition-using-Resnet'...
remote: Enumerating objects: 18, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 18 (delta 3), reused 12 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (18/18), 70.80 KiB | 17.70 MiB/s, done.
Resolving deltas: 100% (3/3), done.


In [30]:
import os
import random
from PIL import Image
from collections import defaultdict
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset
from huggingface_hub import hf_hub_download
from siamese_resnet import unzip_file

In [13]:
DATASET_REPO_ID = "pt-sk/Face_Recognition_Dataset"
DATASET_FILENAME_IN_REPO = "Face Recognition Dataset.zip"
DATASET_REPO_TYPE = "dataset"
LOCAL_DIR = os.getcwd()


In [14]:
# Download the dataset from Hugging Face Hub
hf_hub_download(repo_id=DATASET_REPO_ID, filename=DATASET_FILENAME_IN_REPO, repo_type=DATASET_REPO_TYPE, local_dir=LOCAL_DIR)

# Unzip the dataset
unzip_file(os.path.join(LOCAL_DIR, DATASET_FILENAME_IN_REPO), LOCAL_DIR)

Unzipping: 100%|██████████| 5.20G/5.20G [00:36<00:00, 142MB/s] 


Unzipped /kaggle/working/Face Recognition Dataset.zip to /kaggle/working
Removed zip file: /kaggle/working/Face Recognition Dataset.zip


In [19]:
# triplet loss function
# This function computes the triplet loss for a batch of anchor, positive, and negative samples.
class TripletLoss(nn.Module):
    def __init__(self, margin=0.5):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        # Compute pairwise distances
        distance_positive = F.pairwise_distance(anchor, positive)
        distance_negative = F.pairwise_distance(anchor, negative)
        
        # Calculate triplet loss
        losses = F.relu(distance_positive - distance_negative + self.margin)
        return losses.mean()

In [27]:
class SiameseResNet(nn.Module):
    def __init__(self, embedding_dim=256):
        super(SiameseResNet, self).__init__()
        # Load pretrained ResNet18
        self.backbone = torchvision.models.resnet18(weights="IMAGENET1K_V1", progress=True)
        
        # Replace the final fully connected layer
        self.backbone.fc = nn.Linear(512, embedding_dim)  # 512 -> 256

    def forward(self, x):
        return self.backbone(x)
    
    def print_parameters_count(self):
        total_params = sum(p.numel() for p in self.parameters()) / 1e6  # Convert to millions
        # Print the number of parameters in millions
        print(f"Total parameters: {total_params:.2f}m")

In [28]:
model = SiameseResNet(embedding_dim=256)
model.print_parameters_count()

Total parameters: 11.31m


In [29]:
# pass a sample image through the model
dummy_input = torch.randn(1, 3, 224, 224)  # batch size of 1, 3 channels, 224x224 image
output = model(dummy_input)
output.shape  # should be (1, 256) since we changed the final layer to output 256 features

torch.Size([1, 256])

In [31]:
class TripletFaceDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.id_to_images = defaultdict(list)
        
        # Populate identities and their images
        for identity in os.listdir(root_dir):
            identity_dir = os.path.join(root_dir, identity)
            if os.path.isdir(identity_dir):
                images = [os.path.join(identity_dir, img) 
                          for img in os.listdir(identity_dir) 
                          if img.endswith(('.jpg', '.png'))]
                if len(images) >= 2:  # Ensure at least 2 images per identity
                    self.id_to_images[identity] = images
        self.identities = list(self.id_to_images.keys())

    def __len__(self):
        return len(self.identities) * 10  # Adjust based on your needs

    def __getitem__(self, idx):
        # Anchor and positive from the same identity
        anchor_id = self.identities[idx % len(self.identities)]
        anchor_img_path, positive_img_path = random.sample(self.id_to_images[anchor_id], 2)
        
        # Negative from a different identity
        negative_id = random.choice(self.identities)
        while negative_id == anchor_id:
            negative_id = random.choice(self.identities)
        negative_img_path = random.choice(self.id_to_images[negative_id])
        
        # Load and transform images
        anchor = Image.open(anchor_img_path).convert('RGB')
        positive = Image.open(positive_img_path).convert('RGB')
        negative = Image.open(negative_img_path).convert('RGB')
        
        if self.transform:
            anchor = self.transform(anchor)
            positive = self.transform(positive)
            negative = self.transform(negative)
        
        return anchor, positive, negative

In [36]:
data = TripletFaceDataset(root_dir='Face Recognition Dataset')

In [37]:
a, p, n = data[0]

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms

# Define transforms
transform = transforms.Compose([
    transforms.Resize(100),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Initialize dataset and dataloader
dataset = TripletFaceDataset(root_dir="path/to/dataset", transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize model, loss, and optimizer
model = SiameseResNet(embedding_dim=256)
criterion = TripletLoss(margin=0.5)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(10):
    for batch in dataloader:
        anchor, positive, negative = batch
        anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
        
        # Forward pass
        anchor_emb = model(anchor)
        positive_emb = model(positive)
        negative_emb = model(negative)
        
        # Compute loss
        loss = criterion(anchor_emb, positive_emb, negative_emb)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")