In [1]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt

In [2]:
model = YOLO("yolov8n-face.pt")

In [10]:
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    raise RuntimeError("Could not open webcam")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO inference
    results = model(frame, conf=0.5, verbose=False)

    # Draw detections
    for r in results:
        for box in r.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = float(box.conf[0])

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(
                frame,
                f"Face {conf:.2f}",
                (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0, 255, 0),
                2
            )

    cv2.imshow("YOLO Face Detection", frame)

    # Press ESC to exit
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

In [3]:
import torch
from facenet_pytorch import InceptionResnetV1

device = "cuda" if torch.cuda.is_available() else "cpu"

facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)


100.0%
  state_dict = torch.load(cached_file)


In [4]:
import os
import cv2
import numpy as np
import torch

def get_face_embedding(face_img):
    face = cv2.resize(face_img, (160, 160))
    face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
    face = torch.tensor(face).permute(2, 0, 1).float() / 255.0
    face = face.unsqueeze(0).to(device)

    with torch.no_grad():
        embedding = facenet(face)

    return embedding.cpu().numpy()[0]


known_embeddings = []
known_names = []

base_path = "known_faces"

for person_name in os.listdir(base_path):
    person_dir = os.path.join(base_path, person_name)

    for img_name in os.listdir(person_dir):
        img_path = os.path.join(person_dir, img_name)
        img = cv2.imread(img_path)

        embedding = get_face_embedding(img)
        known_embeddings.append(embedding)
        known_names.append(person_name)

known_embeddings = np.array(known_embeddings)


In [None]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load YOLO face detector
model = YOLO("yolov8n-face.pt")  # your YOLO face model

cap = cv2.VideoCapture(0)

THRESHOLD = 0.9  # lower = stricter matching

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame, conf=0.5)

    for r in results:
        for box in r.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            face_crop = frame[y1:y2, x1:x2]

            if face_crop.size == 0:
                continue

            face_embedding = get_face_embedding(face_crop)

            # Compare
            distances = np.linalg.norm(known_embeddings - face_embedding, axis=1)
            min_dist = distances.min()

            if min_dist < THRESHOLD:
                name = known_names[distances.argmin()]
            else:
                name = "Unknown"

            cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2)
            cv2.putText(frame, name, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)

    cv2.imshow("Face Recognition", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 1 face, 178.6ms
Speed: 14.8ms preprocess, 178.6ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 284.5ms
Speed: 8.6ms preprocess, 284.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 187.5ms
Speed: 3.6ms preprocess, 187.5ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 282.2ms
Speed: 4.0ms preprocess, 282.2ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 244.4ms
Speed: 4.6ms preprocess, 244.4ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 321.4ms
Speed: 4.3ms preprocess, 321.4ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 206.2ms
Speed: 3.5ms preprocess, 206.2ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 face, 129.6ms
Speed: 2.0ms preprocess, 129.6ms inference, 1.1ms postprocess per image at shape (1, 3, 4