# Face Recognition Model Fine-Tuning and Inference Guide

## Part 1: Organizing Images for Fine-Tuning

The dataset should follow the VGGFace2/ImageNet-style directory layout:



In [None]:
data/
└── my_faces/
    ├── person1/
    │   ├── image1.jpg
    │   ├── image2.jpg
    │   └── ...
    ├── person2/
    │   ├── image1.jpg
    │   ├── image2.jpg
    │   └── ...
    └── ...



Each person should have their own directory containing multiple face images. For best results:
- Include 5-10 images per person
- Use clear, well-lit images with different poses and expressions
- Images can be any size (they'll be processed during training)

## Part 2: Fine-Tuning and Saving the Model



In [None]:
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import numpy as np
import os

# 1. Define parameters
data_dir = 'data/my_faces'  # Change this to your dataset location
output_dir = 'models'       # Directory to save models
batch_size = 32
epochs = 8
workers = 0 if os.name == 'nt' else 8

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# 2. Initialize device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Running on device: {device}')

# 3. Define MTCNN for face detection and alignment
mtcnn = MTCNN(
    image_size=160, margin=20, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

# 4. Create directory for cropped faces
cropped_dir = data_dir + '_cropped'
os.makedirs(cropped_dir, exist_ok=True)

# 5. Prepare dataset for face extraction
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, cropped_dir))
    for p, _ in dataset.samples
]

# 6. Create data loader for face extraction
loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

# 7. Extract and save faces
print('Extracting faces...')
for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print(f'\rBatch {i+1} of {len(loader)}', end='')
print('\nFace extraction complete')

# Free up GPU memory
del mtcnn

# 8. Initialize the Inception Resnet V1 model for fine-tuning
print('Initializing InceptionResnetV1 model...')
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',  # Start with pretrained weights
    num_classes=len(dataset.class_to_idx)  # Set number of output classes
).to(device)

# 9. Define optimizer and learning rate scheduler
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

# 10. Define data transforms for training
trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

# 11. Create dataset for training
train_dataset = datasets.ImageFolder(cropped_dir, transform=trans)
print(f"Classes: {train_dataset.class_to_idx}")

# 12. Split indices for training and validation
img_inds = np.arange(len(train_dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

# 13. Create data loaders for training and validation
train_loader = DataLoader(
    train_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    train_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

# 14. Define loss function and metrics
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

# 15. Initialize TensorBoard writer
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

# 16. Initial evaluation
print('\nInitial evaluation')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

# 17. Training loop
best_acc = 0.0
for epoch in range(epochs):
    print(f'\nEpoch {epoch+1}/{epochs}')
    print('-' * 10)

    # Train
    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    # Evaluate
    resnet.eval()
    val_loss, val_metrics = training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )
    
    # Save checkpoint if accuracy improved
    if val_metrics['acc'] > best_acc:
        best_acc = val_metrics['acc']
        # Save the model with class mapping
        checkpoint = {
            'model_state_dict': resnet.state_dict(),
            'class_to_idx': train_dataset.class_to_idx,
            'epoch': epoch,
            'accuracy': val_metrics['acc']
        }
        torch.save(checkpoint, os.path.join(output_dir, 'facenet_best.pth'))
        print(f"Saved model with accuracy: {val_metrics['acc']:.4f}")

# Save final model
checkpoint = {
    'model_state_dict': resnet.state_dict(),
    'class_to_idx': train_dataset.class_to_idx,
    'epoch': epochs,
    'accuracy': val_metrics['acc']
}
torch.save(checkpoint, os.path.join(output_dir, 'facenet_final.pth'))
print(f"Training complete. Final model saved with accuracy: {val_metrics['acc']:.4f}")

# Close TensorBoard writer
writer.close()



## Part 3: Complete Inference Pipeline



In [None]:
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import numpy as np

class FaceRecognition:
    def __init__(self, model_path, device=None):
        # Initialize device
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        if device is not None:
            self.device = device
        print(f'Using device: {self.device}')
        
        # Initialize MTCNN for face detection and alignment
        self.mtcnn = MTCNN(
            image_size=160, margin=20, min_face_size=20,
            thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
            device=self.device
        )
        
        # Load the fine-tuned model
        print(f'Loading model from {model_path}')
        checkpoint = torch.load(model_path, map_location=self.device)
        
        # Initialize the model - we have two options here:
        # Option 1: Use for embeddings (for comparing faces)
        self.resnet_embed = InceptionResnetV1(pretrained=None).eval().to(self.device)
        self.resnet_embed.load_state_dict(checkpoint['model_state_dict'], strict=False)
        
        # Option 2: Use for classification (for identifying specific people)
        num_classes = len(checkpoint['class_to_idx'])
        self.resnet_classify = InceptionResnetV1(
            classify=True, 
            pretrained=None, 
            num_classes=num_classes
        ).eval().to(self.device)
        self.resnet_classify.load_state_dict(checkpoint['model_state_dict'])
        
        # Keep class mapping
        self.idx_to_class = {v: k for k, v in checkpoint['class_to_idx'].items()}
        print(f'Loaded model trained for {num_classes} classes')

    def align_face(self, img_path, save_aligned=False):
        """Detect and align face in image"""
        # Load image
        img = Image.open(img_path)
        
        # Detect face and get probability
        aligned_img, prob = self.mtcnn(img, return_prob=True)
        
        if aligned_img is None:
            print('No face detected')
            return None
        
        print(f'Face detected with probability: {prob:.8f}')
        
        if save_aligned:
            # Save aligned image
            aligned_path = img_path.rsplit('.', 1)[0] + '_aligned.jpg'
            aligned_img_pil = transforms.ToPILImage()(aligned_img)
            aligned_img_pil.save(aligned_path)
            print(f'Saved aligned face to {aligned_path}')
        
        return aligned_img
    
    def get_embedding(self, img_path=None, aligned_img=None):
        """Get embedding vector from image path or aligned image tensor"""
        if aligned_img is None and img_path is not None:
            aligned_img = self.align_face(img_path)
            
        if aligned_img is None:
            return None
            
        # Add batch dimension and move to device
        if not isinstance(aligned_img, torch.Tensor):
            # If img_path was provided and align_face returned a PIL image
            aligned_img = transforms.ToTensor()(aligned_img)
        
        aligned_img = aligned_img.unsqueeze(0).to(self.device)
        
        # Get embedding
        with torch.no_grad():
            embedding = self.resnet_embed(aligned_img)
            
        return embedding.cpu().numpy().flatten()
    
    def classify_face(self, img_path=None, aligned_img=None, threshold=0.8):
        """Classify face from image path or aligned image tensor"""
        if aligned_img is None and img_path is not None:
            aligned_img = self.align_face(img_path)
            
        if aligned_img is None:
            return None, None
            
        # Add batch dimension and move to device
        if not isinstance(aligned_img, torch.Tensor):
            aligned_img = transforms.ToTensor()(aligned_img)
        
        aligned_img = aligned_img.unsqueeze(0).to(self.device)
        
        # Get classification
        with torch.no_grad():
            logits = self.resnet_classify(aligned_img)
            probs = torch.nn.functional.softmax(logits, dim=1)
            
        # Get prediction and confidence
        confidence, idx = torch.max(probs, dim=1)
        confidence = confidence.item()
        idx = idx.item()
        
        if confidence < threshold:
            return "unknown", confidence
            
        predicted_class = self.idx_to_class[idx]
        return predicted_class, confidence

# Usage example
if __name__ == "__main__":
    from torchvision import transforms
    import matplotlib.pyplot as plt
    
    # Initialize face recognition with trained model
    face_rec = FaceRecognition("models/facenet_best.pth")
    
    # Example 1: Get face embedding
    image_path = "path/to/test_image.jpg"
    embedding = face_rec.get_embedding(image_path)
    print(f"Embedding shape: {embedding.shape}")
    
    # Example 2: Classify face
    identity, confidence = face_rec.classify_face(image_path)
    print(f"Identity: {identity}, Confidence: {confidence:.4f}")
    
    # Visualize the aligned face
    aligned_face = face_rec.align_face(image_path)
    if aligned_face is not None:
        plt.figure(figsize=(5, 5))
        plt.imshow(transforms.ToPILImage()(aligned_face))
        plt.title(f"Aligned Face: {identity}" if identity else "Aligned Face")
        plt.axis('off')
        plt.show()



## Important Notes:

1. **Image Guidelines**:
   - Use clear, frontal face images
   - Include 5-10 varied images per person for best results
   - Ensure proper lighting and minimal occlusion

2. **Fine-Tuning Parameters**:
   - Increase `epochs` (e.g., 20-30) for better accuracy with larger datasets
   - Adjust `batch_size` based on your GPU memory
   - The default margin of 20 pixels provides context around the face

3. **Inference Options**:
   - Use `get_embedding()` to extract feature vectors for comparison
   - Use `classify_face()` to identify specific people from your training set
   - Adjust the confidence threshold based on your needs (higher = fewer false positives)

4. **Performance Tips**:
   - For deployment, separate face detection and embedding steps for reused images
   - Use smaller batch sizes if running on CPU
   - Consider quantizing the model for faster inference

Would you like me to explain any specific part of this pipeline in more detail?

Similar code found with 1 license type