In [1]:
# clone and install the required packages for the Face Recognition project
!git clone https://github.com/sathishkumar67/Face-Recognition-using-Resnet.git
!mv /kaggle/working/Face-Recognition-using-Resnet/* /kaggle/working/
!pip install --upgrade pip
!pip install -r requirements.txt
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

Cloning into 'Face-Recognition-using-Resnet'...
remote: Enumerating objects: 78, done.[K
remote: Counting objects: 100% (78/78), done.[K
remote: Compressing objects: 100% (62/62), done.[K
remote: Total 78 (delta 35), reused 54 (delta 15), pack-reused 0 (from 0)[K
Receiving objects: 100% (78/78), 228.56 KiB | 9.52 MiB/s, done.
Resolving deltas: 100% (35/35), done.
Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.1.1
Collecting tensorflow==2.19.0 (from -r requirements.txt (line 1))
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [6]:
import os
import random
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from siamese_resnet.utils import unzip_file
from siamese_resnet.dataset import TripletDataset, TripletDatasetGenerator, triplet_collate_fn
from siamese_resnet.model import SiameseResNet
from siamese_resnet.loss import TripletLoss
from siamese_resnet.trainer import train_model

In [3]:
DATASET_REPO_ID = "pt-sk/Face_Recognition_Dataset"
DATA_ROOT = "face_recognition_with_cropped_faces_dataset"
DATASET_FILENAME_IN_REPO = f"{DATA_ROOT}.zip"
DATASET_REPO_TYPE = "dataset"
LOCAL_DIR = os.getcwd()
NUM_CORES = os.cpu_count()
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [4]:
# Download the dataset from Hugging Face Hub
hf_hub_download(repo_id=DATASET_REPO_ID, filename=DATASET_FILENAME_IN_REPO, repo_type=DATASET_REPO_TYPE, local_dir=LOCAL_DIR)

# Unzip the dataset
unzip_file(os.path.join(LOCAL_DIR, DATASET_FILENAME_IN_REPO), LOCAL_DIR)

(…)cognition_with_cropped_faces_dataset.zip:   0%|          | 0.00/1.31G [00:00<?, ?B/s]

Unzipping: 100%|██████████| 1.31G/1.31G [00:14<00:00, 89.4MB/s]


Unzipped /kaggle/working/face_recognition_with_cropped_faces_dataset.zip to /kaggle/working
Removed zip file: /kaggle/working/face_recognition_with_cropped_faces_dataset.zip


In [None]:
def triplet_collate_fn(batch):
    """Optimized collate function for triplet face recognition
    Features:
    - Zero-copy numpy to tensor conversion
    - Batch-wise normalization (3x faster than per-image)
    - Channel-first conversion
    - FP16/FP32 compatibility
    """
    # Separate components
    anchors, positives, negatives = [], [], []
    
    for item in batch:
        anchors.append(item['anchor'])
        positives.append(item['positive'])
        negatives.append(item['negative'])
    
    def process_batch(imgs):
        """Process a batch of images (numpy arrays)"""
        # Convert to tensor (no copy)
        batch_tensor = torch.as_tensor(np.stack(imgs), dtype=torch.float32).permute(0, 3, 1, 2)  
        # Normalize to [0, 1] (no copy)
        # Note: This is a zero-copy operation, as the data is already in the range [0, 255]
        batch_tensor.div_(255)  # [0,1]
    
        return batch_tensor
    # check if all images are the same size
    
    
    anchors = process_batch(anchors)
    positives = process_batch(positives)
    negatives = process_batch(negatives)
    print(f"anchors shape: {anchors.shape}, positives shape: {positives.shape}, negatives shape: {negatives.shape}")
    return {
        'anchor': anchors,
        'positive': positives,
        'negative': negatives,
    }


In [21]:
# Initialize generator
generator = TripletDatasetGenerator(DATA_ROOT)

# Create splits
splits = generator.create_splits()
    
# Generate triplets for each split
train_triplets = generator.generate_triplets(splits['train'])
val_triplets = generator.generate_triplets(splits['val'])
test_triplets = generator.generate_triplets(splits['test'])

# Create datasets
train_dataset = TripletDataset(train_triplets)
val_dataset = TripletDataset(val_triplets)
test_dataset = TripletDataset(test_triplets)
    
# Create dataloaders (4 workers for optimal IO)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, 
                        num_workers=NUM_CORES, pin_memory=True, persistent_workers=True,
                        collate_fn=triplet_collate_fn, prefetch_factor=2, pin_memory_device='cuda:0', drop_last=True)

val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False,
                        num_workers=NUM_CORES, pin_memory=True, persistent_workers=True,
                        collate_fn=triplet_collate_fn, prefetch_factor=2, pin_memory_device='cuda:0')

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False,
                        num_workers=NUM_CORES, pin_memory=True, persistent_workers=True,
                        collate_fn=triplet_collate_fn, prefetch_factor=2, pin_memory_device='cuda:0')

In [22]:
from __future__ import annotations
import time
import torch
from tqdm import tqdm
import numpy as np


def train_model(model, device, train_loader, val_loader, criterion, optimizer, 
                epochs=50, patience=5, checkpoint_path='best_model.pth'):
    """
    Professional training loop with key features:
    - Validation with early stopping
    - Mixed precision training
    - Gradient clipping
    - Learning rate scheduling
    - Best checkpoint saving
    - Rich progress reporting
    - GPU memory optimization
    """
    # Initialize training state
    best_val_loss = np.inf
    epochs_no_improve = 0
    scaler = torch.amp.GradScaler(device=device)  # For mixed precision
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)
    history = {'train_loss': [], 'val_loss': [], 'lr': []}

    # Early stopping loop
    for epoch in range(epochs):
        start_time = time.time()
        model.train()
        train_loss = 0.0

        # Training phase with mixed precision
        with tqdm(train_loader, unit="batch", desc=f"Train Epoch {epoch+1}") as pbar:
            for batch in pbar:
                anchor, positive, negative = (t.to(device, non_blocking=True) 
                                            for t in batch.values())

                # Mixed precision forward
                with torch.autocast(device_type=f"{device.type}:{device.index}", dtype=torch.float16):
                    anchor_emb = model(anchor)
                    positive_emb = model(positive)
                    negative_emb = model(negative)
                    loss = criterion(anchor_emb, positive_emb, negative_emb)

                # Backward pass with gradient scaling
                scaler.scale(loss).backward()
                
                # Gradient clipping
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
                
                # Optimizer step
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad(set_to_none=True)  # More efficient than zero_grad()

                # Update metrics
                batch_loss = loss.detach().item()
                train_loss += batch_loss * anchor.size(0)
                pbar.set_postfix(loss=batch_loss, lr=optimizer.param_groups[0]['lr'])

        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.inference_mode():
            for batch in val_loader:
                anchor, positive, negative = (t.to(device, non_blocking=True) 
                                            for t in batch.values())
                # Forward pass
                anchor_emb = model(anchor)
                positive_emb = model(positive)
                negative_emb = model(negative)
                loss = criterion(anchor_emb, positive_emb, negative_emb)
                
                # Update metrics
                val_loss += loss.item() * anchor.size(0)

        # Calculate epoch metrics
        train_loss = train_loss / len(train_loader.dataset)
        val_loss = val_loss / len(val_loader.dataset)
        lr = optimizer.param_groups[0]['lr']
        
        # Update learning rate
        scheduler.step(val_loss)
        
        # Update history
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['lr'].append(lr)

        # Early stopping check
        if val_loss < best_val_loss - 1e-4:  # Minimum delta threshold
            best_val_loss = val_loss
            epochs_no_improve = 0
            # Save best checkpoint
            torch.save({
                'epoch': epoch+1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                "Train Loss": train_loss,
                'val_loss': val_loss,
            }, checkpoint_path)
        else:
            epochs_no_improve += 1

        # Epoch summary
        epoch_time = time.time() - start_time
        print(f"\nEpoch {epoch+1:03d} Summary:")
        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
        print(f"Learning Rate: {lr:.2e} | Time: {epoch_time:.1f}s")
        print(f"Best Val Loss: {best_val_loss:.4f} | Patience Left: {patience-epochs_no_improve}")
        
        if epochs_no_improve >= patience:
            print(f"\nEarly stopping triggered after {epoch+1} epochs!")
            break

    return history, model


In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = SiameseResNet(embedding_dim=256)
criterion = TripletLoss(margin=0.5)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-4, betas=(0.9, 0.999))

history, model = train_model(
    model=model.to(device),
    device=device,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    epochs=200,
    patience=20,
    checkpoint_path='best_face_model.pth'
)   

Train Epoch 1:   0%|          | 0/168 [00:00<?, ?batch/s]

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])
anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])
anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])
anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])
anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])
anchors shape

Train Epoch 1:   1%|          | 1/168 [00:01<03:14,  1.16s/batch, loss=0.476, lr=0.0003]

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   1%|          | 2/168 [00:01<01:36,  1.73batch/s, loss=0.53, lr=0.0003] 

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   2%|▏         | 3/168 [00:01<01:06,  2.48batch/s, loss=0.532, lr=0.0003]

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   2%|▏         | 4/168 [00:01<00:53,  3.06batch/s, loss=0.619, lr=0.0003]

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   3%|▎         | 5/168 [00:01<00:45,  3.60batch/s, loss=0.53, lr=0.0003] 

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   4%|▎         | 6/168 [00:02<00:38,  4.16batch/s, loss=0.388, lr=0.0003]

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   5%|▍         | 8/168 [00:02<00:32,  4.95batch/s, loss=0.435, lr=0.0003]

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   5%|▌         | 9/168 [00:02<00:30,  5.21batch/s, loss=0.5, lr=0.0003]  

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   7%|▋         | 11/168 [00:02<00:28,  5.56batch/s, loss=0.267, lr=0.0003]

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   7%|▋         | 12/168 [00:03<00:27,  5.66batch/s, loss=0.424, lr=0.0003]

anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


Train Epoch 1:   9%|▉         | 15/168 [00:03<00:36,  4.14batch/s, loss=0.632, lr=0.0003]


anchors shape: torch.Size([32, 3, 224, 224]), positives shape: torch.Size([32, 3, 224, 224]), negatives shape: torch.Size([32, 3, 224, 224])


ValueError: Caught ValueError in DataLoader worker process 3.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_71/281572673.py", line 26, in triplet_collate_fn
    anchors = process_batch(anchors)
              ^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_71/281572673.py", line 20, in process_batch
    batch_tensor = torch.as_tensor(np.stack(imgs), dtype=torch.float32).permute(0, 3, 1, 2)
                                 ^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/numpy/core/shape_base.py", line 449, in stack
    raise ValueError('all input arrays must have the same shape')
ValueError: all input arrays must have the same shape


In [15]:
for batch in train_loader:
    anchor, positive, negative = batch.values()

ValueError: Caught ValueError in DataLoader worker process 2.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/siamese_resnet/dataset.py", line 42, in triplet_collate_fn
    'anchor': process_batch(anchors),
              ^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/siamese_resnet/dataset.py", line 34, in process_batch
    batch_tensor = torch.as_tensor(np.stack(imgs), dtype=torch.float32).permute(0, 3, 1, 2)
                                 ^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/numpy/core/shape_base.py", line 449, in stack
    raise ValueError('all input arrays must have the same shape')
ValueError: all input arrays must have the same shape


In [16]:
anchor.shape, positive.shape, negative.shape

(torch.Size([32, 3, 224, 224]),
 torch.Size([32, 3, 224, 224]),
 torch.Size([32, 3, 224, 224]))

In [None]:
# best_model = SiameseResNet(embedding_dim=256)
# best_model.load_state_dict(torch.load('best_face_model.pth', map_location="cpu", weights_only=True)["model_state_dict"])

<All keys matched successfully>

In [None]:
# best_model.to(device)
# best_model.eval()
# eval_loss_sum = 0.0
# with torch.inference_mode():
#     for batch in val_loader: 
#         anchor, positive, negative = (t.to(device, non_blocking=True) 
#                                     for t in batch.values())
        
#         # Forward pass
#         anchor_emb = best_model(anchor)
#         positive_emb = best_model(positive)
#         negative_emb = best_model(negative)
#         loss = criterion(anchor_emb, positive_emb, negative_emb)
        
#         # Update metrics
#         eval_loss_sum += loss.item() * anchor.size(0)
# eval_loss = eval_loss_sum / len(val_loader.dataset)
# print(f"Validation Loss: {eval_loss:.4f}")

Validation Loss: 0.3251


In [None]:
# # find test loss
# test_loss_sum = 0.0
# with torch.inference_mode():
#     for batch in test_loader:
#         anchor, positive, negative = (t.to(device, non_blocking=True) 
#                                     for t in batch.values())
        
#         # Forward pass
#         anchor_emb = best_model(anchor)
#         positive_emb = best_model(positive)
#         negative_emb = best_model(negative)
#         loss = criterion(anchor_emb, positive_emb, negative_emb)
        
#         # Update metrics
#         test_loss_sum += loss.item() * anchor.size(0)
# test_loss = test_loss_sum / len(test_loader.dataset)
# print(f"Test Loss: {test_loss:.4f}")

Test Loss: 0.3602
