# Face Detection

In [1]:
!pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet-pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (

In [2]:
from facenet_pytorch import MTCNN, InceptionResnetV1

In [3]:
help(MTCNN)

Help on class MTCNN in module facenet_pytorch.models.mtcnn:

class MTCNN(torch.nn.modules.module.Module)
 |  MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, select_largest=True, selection_method=None, keep_all=False, device=None)
 |  
 |  MTCNN face detection module.
 |  
 |  This class loads pretrained P-, R-, and O-nets and returns images cropped to include the face
 |  only, given raw input images of one of the following types:
 |      - PIL image or list of PIL images
 |      - numpy.ndarray (uint8) representing either a single image (3D) or a batch of images (4D).
 |  Cropped faces can optionally be saved to file
 |  also.
 |  
 |  Keyword Arguments:
 |      image_size {int} -- Output image size in pixels. The image will be square. (default: {160})
 |      margin {int} -- Margin to add to bounding box, in terms of pixels in the final image. 
 |          Note that the application of the margin differs slightly from the 

In [4]:
# If required, create a face detection pipeline using MTCNN:
mtcnn = MTCNN()

# Create an inception resnet (in eval mode):
resnet = InceptionResnetV1(pretrained='vggface2').eval()

  0%|          | 0.00/107M [00:00<?, ?B/s]

In [5]:
from PIL import Image

img = Image.open("group_photo.jpg")

# Get cropped and prewhitened image tensor
img_cropped = mtcnn(img, save_path="group_photo_saved.jpg")

# Calculate embedding (unsqueeze to add batch dimension)
img_embedding = resnet(img_cropped.unsqueeze(0))

# Or, if using for VGGFace2 classification
resnet.classify = True
img_probs = resnet(img_cropped.unsqueeze(0))

# Custom face comparison using pretrained resnet18

In [6]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import torch.nn.functional as F
import torchvision.models as models

In [7]:
class FaceNet(torch.nn.Module):
    def __init__(self):
        super(FaceNet, self).__init__()
        # Define the architecture here (e.g., CNN layers followed by fully connected layers)
        self.cnn_layers = torch.nn.Sequential(
            *list(models.resnet18(pretrained=True).children())[:-2]  # Remove last 2 layers (avgpool and fc)
        )
        # Modify the fully connected layer to output embeddings of desired size
        self.fc = torch.nn.Linear(512*7*7, 128)  # Adjust input size

    def forward(self, x):
        # Forward pass through the CNN layers
        x = self.cnn_layers(x)
        # Flatten the output for the fully connected layer
        x = torch.flatten(x, 1)
        # Forward pass through the fully connected layer
        x = self.fc(x)
        # Apply L2 normalization to the embeddings
        x = F.normalize(x, p=2, dim=1)
        return x

In [8]:
# Step 2: Preprocess the images
def preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize the image to fit the input size of the model
        transforms.ToTensor(),           # Convert image to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize image
    ])
    image = transform(image)
    return image.unsqueeze(0)  # Add batch dimension

# Step 3: Generate embeddings for the images
def generate_embedding(image_path, model):
    image_tensor = preprocess_image(image_path)
    with torch.no_grad():
        embedding = model(image_tensor)
    return embedding

# Step 4: Calculate similarity between embeddings
def calculate_similarity(embedding1, embedding2):
    # Calculate cosine similarity between the embeddings
    similarity = F.cosine_similarity(embedding1, embedding2)
    return similarity.item()

In [9]:
# Load FaceNet model
face_net_model = FaceNet()

# Load images and generate embeddings
image1_path = "Shah_Rukh_Khan.115.jpg"
image2_path = "Shah_Rukh_Khan.131.jpg"
embedding1 = generate_embedding(image1_path, face_net_model)
embedding2 = generate_embedding(image2_path, face_net_model)

# Calculate similarity between embeddings
similarity_score = calculate_similarity(embedding1, embedding2)
print("Similarity score between the images:", similarity_score)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 50.3MB/s]


Similarity score between the images: 0.5617234706878662
