In [83]:
# Imports
import os
import yaml
import torch
import insightface
import numpy as np
import torch.nn as nn
from PIL import Image
import torch.optim as optim
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader, random_split

# Load configuration from config.yaml
with open("config.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

batch_size = config['training']['batch_size']
learning_rate = config['training']['learning_rate']
patience = config['training']['patience']
min_delta = config['training']['min_delta']

In [84]:
data_dir = './data/'
train_ratio = 0.8

transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
])

dataset = ImageFolder(root=data_dir, transform=transform)

train_size = int(train_ratio * len(dataset))
train_dataset, test_dataset = random_split(dataset, [train_size, len(dataset) - train_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Testing samples: {len(test_dataset)}")

Training samples: 8
Testing samples: 2


In [85]:
model = insightface.app.FaceAnalysis(name="buffalo_l", providers=['CPUExecutionProvider'])
model.prepare(ctx_id=-1)  # ctx_id=-1 forces CPU mode

class ArcFaceFineTune(nn.Module):
    def __init__(self, base_model, num_classes=2):
        super(ArcFaceFineTune, self).__init__()
        self.base_model = base_model
        self.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        output = self.fc(x)
        return output

# Initialize the fine-tuning model with the ArcFace feature extractor
fine_tune_model = ArcFaceFineTune(model).to(torch.device("cpu"))

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/huginn/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/huginn/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/huginn/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/huginn/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/huginn/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127.5
se

In [86]:
def extract_embeddings(model, img_tensor):
    img_pil = transforms.ToPILImage()(img_tensor).convert("RGB")
    img_np = np.array(img_pil)

    # Run face detection and extract embeddings
    faces = model.get(img_np)
    
    # Check if any faces were detected
    if len(faces) > 0:
        return faces[0].normed_embedding
    else:
        print("No face detected in the image.")
        return None

# Example usage: extracting embeddings for the first image in the dataset
sample_image, _ = train_dataset[0]
sample_embedding = extract_embeddings(model, sample_image)  # Convert to NumPy array
print(f"Embedding shape: {sample_embedding.shape}")

Embedding shape: (512,)


In [87]:
criterion = nn.CrossEntropyLoss()  # Use cross-entropy loss for classification
optimizer = optim.Adam(fine_tune_model.parameters(), lr=learning_rate)

# Training Loop
epochs = 10
train_losses = []

for epoch in range(epochs):
    fine_tune_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        embeddings = []
        for img_tensor in inputs:
            embedding = extract_embeddings(model, img_tensor)
            if embedding is not None:
                embeddings.append(embedding)

        if len(embeddings) > 0:
            embeddings_tensor = torch.stack([torch.tensor(e) for e in embeddings])
            labels = labels[:len(embeddings)]

            optimizer.zero_grad()
            outputs = fine_tune_model(embeddings_tensor)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    
    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")
    
    # Early stopping based on min_delta (if loss improvement is too small)
    if epoch > 0 and abs(train_losses[-1] - train_losses[-2]) < min_delta:
        print(f"Early stopping at epoch {epoch+1}")
        break

Epoch [1/10], Loss: 0.7007
Epoch [2/10], Loss: 0.7001
Early stopping at epoch 2


In [88]:
fine_tune_model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        embeddings = []
        
        for img_tensor in inputs:
            embedding = extract_embeddings(model, img_tensor)
            if embedding is not None:
                embeddings.append(embedding)
        
        if len(embeddings) > 0:
            embeddings_tensor = torch.stack([torch.tensor(e) for e in embeddings])
            labels = labels[:len(embeddings)]
            
            outputs = fine_tune_model(embeddings_tensor)
            
            _, predicted = torch.max(outputs, 1)
            
            total += len(predicted)
            correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")


Test Accuracy: 100.00%


In [89]:
torch.save(fine_tune_model.state_dict(), './results/fine_tuned_arcface.pth')
print("Model saved successfully!")

Model saved successfully!
