In [2]:
!python -c "import cv2; print(cv2.__version__)"

4.11.0


In [None]:
!which python
!python -c "import sys; print(sys.executable)"


/Users/kadessovb02/Desktop/test_tasks/test-task-cef/.venv/bin/python
/Users/kadessovb02/Desktop/test_tasks/test-task-cef/.venv/bin/python


In [4]:
!pip install ipykernel
!python -m ipykernel install --user --name face-env --display-name "Python (face-env)"


Installed kernelspec face-env in /Users/kadessovb02/Library/Jupyter/kernels/face-env


In [20]:
from __future__ import annotations
import pathlib
import math
import logging
from typing import Sequence, Optional, List
import numpy as np
from fastapi import UploadFile, HTTPException, status
from dataclasses import dataclass
from src import PROJECT_ROOT
import cv2

In [23]:
@dataclass(slots=True)
class FaceIdentity:
    person_id: str
    embedding: Sequence[float]

    def __repr__(self) -> str:
        return f"<FaceIdentity {self.person_id[:20]}...>"
face_detector_prototxt = PROJECT_ROOT / "src/ml_models/deploy.prototxt"
face_detector_model = PROJECT_ROOT / "src/ml_models/res10_300x300_ssd_iter_140000.caffemodel"
face_embedder_model = PROJECT_ROOT / "src/ml_models/nn4.small2.v1.t7"
print('*'*7)
print(face_detector_prototxt, face_detector_model, face_embedder_model)
print('*'*7)
faces: List[FaceIdentity] = []
detector = cv2.dnn.readNetFromCaffe(
            face_detector_prototxt,
            face_detector_model,
        )
embedder = cv2.dnn.readNetFromTorch(face_embedder_model)

*******
/Users/kadessovb02/Desktop/test_tasks/test-task-cef/src/ml_models/deploy.prototxt /Users/kadessovb02/Desktop/test_tasks/test-task-cef/src/ml_models/res10_300x300_ssd_iter_140000.caffemodel /Users/kadessovb02/Desktop/test_tasks/test-task-cef/src/ml_models/nn4.small2.v1.t7
*******


In [57]:
faces_db_path = PROJECT_ROOT / "faces_db"
print(faces_db_path)
faces: List[FaceIdentity] = []


/Users/kadessovb02/Desktop/test_tasks/test-task-cef/faces_db


In [68]:
test_faces_db_path = PROJECT_ROOT / "tests/images"
test_faces_db_path

PosixPath('/Users/kadessovb02/Desktop/test_tasks/test-task-cef/tests/images')

In [58]:
def read_image_from_upload(file: UploadFile) -> np.ndarray:
    content = file.file.read()
    nparr = np.frombuffer(content, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    if img is None:
        raise ValueError("Cannot decode image from upload")
    return img



In [59]:
def extract_embedding(img: np.ndarray) -> List[float]:
        h, w = img.shape[:2]
        blob = cv2.dnn.blobFromImage(img, 1.0, (300, 300), (104, 177, 123))
        detector.setInput(blob)
        detections = detector.forward()

        if detections.shape[2] == 0:
            raise ValueError("No face detected")
        i = np.argmax(detections[0, 0, :, 2])
        conf = float(detections[0, 0, i, 2])
        if conf < 0.5:
            raise ValueError("Low confidence face")

        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        x1, y1, x2, y2 = box.astype(int)
        face = img[y1:y2, x1:x2]
        if face.size == 0:
            raise ValueError("Empty face crop")

        face_blob = cv2.dnn.blobFromImage(face, 1/255, (96, 96), (0, 0, 0), swapRB=True)
        embedder.setInput(face_blob)
        vec = embedder.forward().flatten()
        return vec.tolist()

In [60]:
import itertools

def preload_faces() -> None:
    db_dir = pathlib.Path(faces_db_path)
    faces.clear()
    # for img_path in db_dir.glob("*.jpg") | db_dir.glob("*.png"):
    for img_path in itertools.chain(db_dir.glob("*.jpg"), db_dir.glob("*.png")):
    # for img_path in db_dir.glob("*.jpg") | db_dir.glob("*.png"):
        image = cv2.imread(str(img_path))
        if image is None:
            continue
        vec = extract_embedding(image)
        faces.append(FaceIdentity(person_id=img_path.stem, embedding=vec))
            

In [61]:
preload_faces()

In [62]:
faces

[<FaceIdentity Jeff Bezos...>,
 <FaceIdentity Elon Musk...>,
 <FaceIdentity Kadessov Bek...>]

In [63]:
print(len(faces[0].embedding), faces[0].embedding)

128 [0.09934961050748825, 0.04149291291832924, -0.1650935262441635, -0.0003136911545880139, 0.00474608363583684, 0.12053040415048599, -0.0010250459890812635, -0.009359359741210938, -0.06061220169067383, -0.06617451459169388, -0.005281899124383926, 0.11030469834804535, -0.017715172842144966, -0.18636386096477509, -0.14680549502372742, 0.022222885861992836, -0.06705533713102341, -0.041599638760089874, -0.03404179960489273, 0.13302743434906006, 0.06322041898965836, -0.027591286227107048, 0.11977212131023407, 0.00868282187730074, -0.07403411716222763, 0.10103972256183624, 0.0781090259552002, -0.2406017780303955, -0.04960612207651138, 0.02755623124539852, -0.10688519477844238, 0.05227252095937729, 0.010951279662549496, 0.09245490282773972, -0.04439910501241684, -0.0798986628651619, 0.021238133311271667, 0.23190201818943024, -0.03246373310685158, 0.005048909690231085, -0.04185359552502632, -0.07701095193624496, -0.05027305334806442, 0.07801036536693573, -0.10892353951931, 0.05636750906705856

In [None]:
def find_best_match(query_vec: Sequence[float]) -> tuple[Optional[str], float]:
    best_dist = float("inf")
    best_id: Optional[str] = None
    for face in faces:
        dist = math.sqrt(sum((a - b) ** 2 for a, b in zip(query_vec, face.embedding)))
        if dist < best_dist:
            best_dist = dist
            best_id = face.person_id
    return best_id, best_dist

In [69]:
image = cv2.imread(str(test_faces_db_path / 'Bek.jpg'))
vec = extract_embedding(image)
vec

[0.027442537248134613,
 0.1894277036190033,
 -0.0011587596964091063,
 0.10580896586179733,
 -0.03773779049515724,
 0.09486744552850723,
 0.04390466958284378,
 0.09131211042404175,
 0.007595520466566086,
 0.13407981395721436,
 0.11734998226165771,
 0.027498874813318253,
 0.03243325650691986,
 -0.24171532690525055,
 0.01714860461652279,
 0.004412617534399033,
 0.01240317802876234,
 0.019538521766662598,
 -0.05877901613712311,
 0.08429515361785889,
 0.06997723877429962,
 -0.14103847742080688,
 0.017193503677845,
 0.097173310816288,
 0.05855156481266022,
 -0.07557552307844162,
 -0.031100217252969742,
 -0.15695525705814362,
 0.050338808447122574,
 0.02772817201912403,
 0.022570515051484108,
 0.017025746405124664,
 0.054726894944906235,
 0.10765837132930756,
 -0.05356697365641594,
 -0.06515446305274963,
 0.06806477159261703,
 0.11559437215328217,
 -0.033805541694164276,
 -0.023154711350798607,
 0.03594580292701721,
 -0.1638757437467575,
 -0.016527719795703888,
 0.019621869549155235,
 -0.0794

In [70]:
find_best_match(vec)

('Kadessov Bek', 0.7895003309527409)