# FACE RECOGNITION

This model uses continuous training, where:

- a general model is used to recognize all people,
- a specific model is used to recognize only common faces.

Whenever a face achieves a similarity of 70% or higher, the specific model is retrained to improve accuracy for that face.

In [57]:
import torch
import torch.nn as nn
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from PIL import Image
import cv2
import matplotlib.pyplot as plt

from sklearn.metrics.pairwise import cosine_similarity
import mediapipe as mp
mp_face_mesh = mp.solutions.face_mesh


import warnings
warnings.filterwarnings("ignore")

from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import dotenv
import os
dotenv.load_dotenv()

### DB INTEGRATION

In [None]:
DB_URL = os.getenv("DB_URL")

engine = create_engine(DB_URL)
Base = declarative_base()
Session = sessionmaker(bind=engine)

class Faces(Base):
    __tablename__ = 'faces'
    id = Column(Integer, primary_key=True, index=True)
    name = Column(String, unique=True, nullable=False)
    embeddings = Column(String, unique=False, nullable=False)

Base.metadata.create_all(bind=engine)

### TORCH PREPROCESS

In [58]:
def pth_processing(fp):
    class PreprocessInput(torch.nn.Module):
        def forward(self, x):
            x = x.to(torch.float32)
            x = torch.flip(x, dims=(0,))
            x[0, :, :] -= 91.4953
            x[1, :, :] -= 103.8827
            x[2, :, :] -= 131.0912
            return x

    def get_img_torch(img):
        ttransform = transforms.Compose([
            transforms.PILToTensor(),
            PreprocessInput()
        ])
        img = img.resize((224, 224), Image.Resampling.NEAREST)
        img = ttransform(img)
        img = torch.unsqueeze(img, 0).to(device)
        return img
    return get_img_torch(fp)

### TRANSFORMER MODEL

In [59]:
class FaceTransformer(nn.Module):
    def __init__(self, backbone, d_model=512, nhead=8, num_layers=3):
        super(FaceTransformer, self).__init__()
        self.backbone = backbone
        self.d_model = d_model
        
        # Projection layer to adjust feature dimensions
        self.projection = nn.Linear(512, d_model)
        
        self.pos_encoder = nn.Parameter(torch.randn(1, 49, d_model))
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model * 49, 512)  # Final embedding size

    def forward(self, x):
        features = self.backbone.extract_features(x)
        features = torch.nn.functional.relu(features)
        
        # Reshape and project features
        features = features.view(features.size(0), 512, -1)
        features = self.projection(features.permute(0, 2, 1))
        
        # Add positional encoding
        features = features + self.pos_encoder
        
        # Pass through transformer
        output = self.transformer_encoder(features.permute(1, 0, 2))
        output = output.permute(1, 2, 0).flatten(1)
        return self.fc(output)

### MEDIAPIPE - DETECT FACE 

In [60]:
def detect_face(frame):
    face_mesh = mp_face_mesh.FaceMesh(
        max_num_faces=1,
        refine_landmarks=False,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    )
    with face_mesh:
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(frame_rgb)
        
        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                h, w = frame.shape[:2]
                x_min = w
                y_min = h
                x_max = y_max = 0
                for landmark in face_landmarks.landmark:
                    x, y = int(landmark.x * w), int(landmark.y * h)
                    x_min = min(x_min, x)
                    y_min = min(y_min, y)
                    x_max = max(x_max, x)
                    y_max = max(y_max, y)
                return (x_min, y_min, x_max, y_max)

### EXTRACT FEATURES

In [61]:
pth_backbone_model = torch.jit.load('model/torchscript_model_0_66_49_wo_gl.pth').to(device)
model = FaceTransformer(pth_backbone_model).to(device)

def extract_features(image):
    face = detect_face(image)
    if face is None:
        return None
    x1, y1, x2, y2 = face
    face_img = Image.fromarray(cv2.cvtColor(image[y1:y2, x1:x2], cv2.COLOR_BGR2RGB))
    face_tensor = pth_processing(face_img)
    with torch.no_grad():
        features = model(face_tensor)
        
    return features.cpu().numpy()

### COMPARE FACES

In [62]:
def compare_faces(img1, img2):
    features1 = extract_features(img1)
    features2 = extract_features(img2)
    if features1 is None or features2 is None:
        return None
    similarity = cosine_similarity(features1, features2)[0][0]
    return similarity

In [69]:
image1 = cv2.imread("src/photo1.jpeg")
image2 = cv2.imread("src/photo2.jpeg")
face1 = detect_face(image1)
face2 = detect_face(image2)
similarity = compare_faces(image1, image2)

print(f"\n\nFace similarity: {similarity}\n\n")



Face similarity: 0.8058371543884277




I0000 00:00:1723824533.420526 14719781 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1723824533.421757 14746633 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1723824533.423873 14746633 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1723824533.438512 14719781 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1723824533.439593 14746644 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1723824533.441646 14746645 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1723824533.454790 14719781 gl_