In [9]:
# Import necessary libraries
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import cv2
import time
import os

In [10]:
# Initializing MTCNN (Multi-task Cascaded Convolutional Networks) and InceptionResnetV1 models
mtcnn0 = MTCNN(image_size=240, margin=0, keep_all=False, min_face_size=40)
mtcnn = MTCNN(image_size=240, margin=0, keep_all=True, min_face_size=40)
resnet = InceptionResnetV1(pretrained='vggface2').eval()  # Using InceptionResnetV1 with VGGFace2 pretraining

In [11]:
# Loading the dataset using ImageFolder from torchvision
dataset = datasets.ImageFolder('photos')  # The 'photos' folder contains subdirectories with labeled images
idx_to_class = {i: c for c, i in dataset.class_to_idx.items()}  # Mapping index to class label

def collate_fn(x):
    return x[0]

# Creating a DataLoader for the dataset
loader = DataLoader(dataset, collate_fn=collate_fn)

name_list = []  # List to store names corresponding to cropped photos
embedding_list = []  # List to store embedding matrices after conversion from cropped faces using resnet

# Processing each image in the dataset
for img, idx in loader:
    face, prob = mtcnn0(img, return_prob=True)
    if face is not None and prob > 0.92:
        emb = resnet(face.unsqueeze(0))
        embedding_list.append(emb.detach())
        name_list.append(idx_to_class[idx])

# Saving the training data (embedding vectors and corresponding labels) to 'data.pt' file
data = [embedding_list, name_list]
torch.save(data, 'data.pt')

In [13]:
# Using webcam to recognize faces in real-time
load_data = torch.load('data.pt')
embedding_list = load_data[0]
name_list = load_data[1]

# Initializing the webcam
cam = cv2.VideoCapture(1)

while True:
    # Reading a frame from the webcam
    ret, frame = cam.read()
    if not ret:
        print("Failed to grab frame, try again")
        break

    # Converting the frame to PIL format for face detection
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    img_cropped_list, prob_list = mtcnn(img, return_prob=True)

    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)

        for i, prob in enumerate(prob_list):
            if prob > 0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach()

                # Calculating distances to identify the person
                dist_list = [torch.dist(emb, emb_db).item() for emb_db in embedding_list]

                min_dist = min(dist_list)
                min_dist_idx = dist_list.index(min_dist)
                name = name_list[min_dist_idx]

                box = boxes[i]

                original_frame = frame.copy()  # Storing a copy of the frame before drawing on it

                if min_dist < 0.90:
                    org = (int(box[0]), int(box[1]) - 10)  # Adjusting the text position
                    frame = cv2.putText(frame, f"{name} {min_dist:.2f}", org,
                                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA)

                frame = cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)

    # Displaying the frame with recognized faces
    cv2.imshow("IMG", frame)

    k = cv2.waitKey(1)
    if k == 27:  # Pressing the ESC key to exit the loop
        print('Esc pressed, closing...')
        break

    elif k == 32:  # Pressing the spacebar to save an image
        print('Enter your name:')
        name = input()

        # Creating a directory if it doesn't exist
        if not os.path.exists('photos/' + name):
            os.mkdir('photos/' + name)

        # Saving the image with the entered name and timestamp
        img_name = "photos/{}/{}.jpg".format(name, int(time.time()))
        cv2.imwrite(img_name, cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB))
        print("Saved: {}".format(img_name))

# Releasing resources (closing the webcam and destroying OpenCV windows)
cam.release()
cv2.destroyAllWindows()

Esc pressed, closing...
