In [1]:
# importing libraries
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import os
import cv2
from ultralytics import YOLO
import cv2
from facenet_pytorch import MTCNN, InceptionResnetV1
import numpy as np
from torchvision.transforms import functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# KTP terlah direname sesuai dengan format: nama_orang.jpg
filenames = os.listdir('dataset/ktp/')
path = []
filename_no_ext = []
for i in filenames:
    path.append('dataset/ktp/'+i)
    filename_no_ext.append(i.split('.')[0])

# read image

img = []
for i in path:
    img.append(cv2.imread(i))

yolo = YOLO('yolov8n-face.pt') # initializing yolo model

for i in range(len(img)):
    results = yolo(img[i]) # detecting faces
    boxes = results[0].boxes
    if boxes != []:
        for box in boxes:
            top_left_x = int(box.xyxy.tolist()[0][0])
            top_left_y = int(box.xyxy.tolist()[0][1])
            bottom_right_x = int(box.xyxy.tolist()[0][2])
            bottom_right_y = int(box.xyxy.tolist()[0][3])
            face = img[i][top_left_y:bottom_right_y, top_left_x:bottom_right_x]
            # resize face to 160x160
            face = cv2.resize(face, (160, 160))
            cv2.imwrite('dataset/faces-close/'+filename_no_ext[i]+'.jpg', face)

# make dataset torch format
filenames = os.listdir('dataset/faces-close/')
path = []
label = []
for i in filenames:
    path.append('dataset/faces-close/'+i)
    label.append(i.split('.')[0])

# read image
faces = []
for i in path:
    faces.append(cv2.imread(i))

# for each label, make a folder in dataset/torch/
for i in label:
    if not os.path.exists('dataset/torch/'+i):
        os.makedirs('dataset/torch/'+i)
# for each face, save it in dataset/torch/label/
for i in range(len(faces)):
    cv2.imwrite('dataset/torch/'+label[i]+'/'+str(i)+'.jpg', faces[i])



0: 448x640 1 face, 192.2ms
Speed: 5.2ms preprocess, 192.2ms inference, 127.5ms postprocess per image at shape (1, 3, 448, 640)

0: 416x640 1 face, 184.5ms
Speed: 2.2ms preprocess, 184.5ms inference, 1.8ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 face, 9.8ms
Speed: 2.3ms preprocess, 9.8ms inference, 1.6ms postprocess per image at shape (1, 3, 416, 640)

0: 384x640 1 face, 178.8ms
Speed: 2.2ms preprocess, 178.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 416x640 1 face, 10.3ms
Speed: 1.7ms preprocess, 10.3ms inference, 1.6ms postprocess per image at shape (1, 3, 416, 640)

0: 384x640 1 face, 10.3ms
Speed: 2.1ms preprocess, 10.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 face, 10.3ms
Speed: 2.9ms preprocess, 10.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 448x640 1 face, 13.2ms
Speed: 2.5ms preprocess, 13.2ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)

0

In [3]:
resnet = InceptionResnetV1(pretrained='vggface2').eval() # initializing resnet for face img to embeding conversion

dataset=datasets.ImageFolder('dataset/torch/') # photos folder path 
idx_to_class = {i:c for c,i in dataset.class_to_idx.items()} # accessing names of peoples from folder names

def collate_fn(x):
    return x[0]

loader = DataLoader(dataset, collate_fn=collate_fn)

face_list = [] # list of cropped faces from photos folder
name_list = [] # list of names corrospoing to cropped photos
embedding_list = [] # list of embeding matrix after conversion from cropped faces to embedding matrix using resnet

for img, idx in loader:
    face = F.to_tensor(np.float32(img))
    emb = resnet(face.unsqueeze(0)) # passing cropped face into resnet model to get embedding matrix
    embedding_list.append(emb.detach()) # resulten embedding matrix is stored in a list
    name_list.append(idx_to_class[idx]) # names are stored in a list

In [4]:
data = [embedding_list, name_list]
torch.save(data, 'data_juga.pt') # saving data.pt file

In [6]:
yolo = YOLO('yolov8n-face.pt') # initializing yolo model

cap = cv2.VideoCapture(0) # capturing video from webcam
while True:
    _, frame = cap.read() # reading frame from webcam
    results = yolo(frame) # detecting faces
    boxes = results[0].boxes
    if boxes != []:
        for box in boxes:
            top_left_x = int(box.xyxy.tolist()[0][0])
            top_left_y = int(box.xyxy.tolist()[0][1])
            bottom_right_x = int(box.xyxy.tolist()[0][2])
            bottom_right_y = int(box.xyxy.tolist()[0][3])
            face = frame[top_left_y:bottom_right_y, top_left_x:bottom_right_x]
            # resize face to 160x160
            face = cv2.resize(face, (160, 160))
            face = F.to_tensor(np.float32(img))
            emb = resnet(face.unsqueeze(0))
            dist_list = [] # list of matched distances, minimum distance is used to identify the person
            for idx, emb_db in enumerate(embedding_list):
                dist = torch.dist(emb, emb_db).item()
                dist_list.append(dist)
            if min(dist_list)<2.0:
                idx = dist_list.index(min(dist_list))
                # name = name_list[idx]
                name = str(min(dist_list))
            else:
                name = str(min(dist_list))
            
            cv2.rectangle(frame, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), (0,255,0), 2)
            cv2.putText(frame, name, (top_left_x, top_left_y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'): # press q to exit
        break
cap.release()
cv2.destroyAllWindows()

    
    



0: 480x640 1 face, 11.5ms
Speed: 1.2ms preprocess, 11.5ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 9.5ms
Speed: 1.6ms preprocess, 9.5ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 9.8ms
Speed: 2.0ms preprocess, 9.8ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 10.1ms
Speed: 2.0ms preprocess, 10.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 9.3ms
Speed: 1.9ms preprocess, 9.3ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 9.9ms
Speed: 1.8ms preprocess, 9.9ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 9.5ms
Speed: 1.5ms preprocess, 9.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 9.5ms
Speed: 1.4ms preprocess, 9.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 

In [32]:
frame = cv2.imread('dataset/ktp/Antonius Natael.jpg')
results = yolo(frame) # detecting faces
boxes = results[0].boxes
if boxes != []:
    for box in boxes:
        top_left_x = int(box.xyxy.tolist()[0][0])
        top_left_y = int(box.xyxy.tolist()[0][1])
        bottom_right_x = int(box.xyxy.tolist()[0][2])
        bottom_right_y = int(box.xyxy.tolist()[0][3])
        face = frame[top_left_y:bottom_right_y, top_left_x:bottom_right_x]
        # resize face to 160x160
        face = cv2.resize(face, (160, 160))
        face = F.to_tensor(np.float32(img))
        emb = resnet(face.unsqueeze(0))
        dist_list = [] # list of matched distances, minimum distance is used to identify the person
        for idx, emb_db in enumerate(embedding_list):
            dist = torch.dist(emb, emb_db).item()
            dist_list.append(dist)
        if min(dist_list)<1.0:
            idx = dist_list.index(min(dist_list))
            name = name_list[idx]
        else:
            name = 'Unknown'
        cv2.rectangle(frame, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), (0,255,0), 2)
        cv2.putText(frame, name, (top_left_x, top_left_y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)

cv2.imshow('frame', frame)
cv2.waitKey(0)
cv2.destroyAllWindows()



0: 416x640 1 face, 15.5ms
Speed: 11.5ms preprocess, 15.5ms inference, 2.1ms postprocess per image at shape (1, 3, 416, 640)
