# forensicface--A tool for forensic face examination

> An integrated tool to compare faces using state-of-the-art face recognition models and compute Likelihood Ratios 

In [None]:
# | default_exp app


In [None]:
# | export
from nbdev.showdoc import *
from fastcore.utils import *
import onnxruntime
import cv2
import numpy as np
import os.path as osp
from glob import glob
from insightface.app import FaceAnalysis
from insightface.utils import face_align


In [None]:
# | export
class ForensicFace:
    "A (forensic) face comparison tool"

    def __init__(
        self,
        model: str = "sepaelv2",
        det_size: int = 320,
        use_gpu: bool = True,
        gpu: int = 0,  # which GPU to use
        magface=False,
        extended=True,
    ):
        self.extended = extended
        if self.extended == True:
            allowed_modules = ["detection", "landmark_3d_68", "genderage"]
        else:
            allowed_modules = ["detection"]

        self.det_size = (det_size, det_size)

        self.magface = magface

        self.detectmodel = FaceAnalysis(
            name=model,
            allowed_modules=allowed_modules,
            providers=[("CUDAExecutionProvider", {"device_id": gpu})]
            if use_gpu
            else ["CPUExecutionProvider"],
        )
        self.detectmodel.prepare(ctx_id=gpu if use_gpu else -1, det_size=self.det_size)

        onnx_rec_model = glob(
            osp.join(
                osp.expanduser("~/.insightface/models"),
                model,
                "adaface",
                "adaface_*.onnx",
            )
        )
        assert len(onnx_rec_model) == 1
        self.ort_ada = onnxruntime.InferenceSession(
            onnx_rec_model[0],
            providers=[("CUDAExecutionProvider", {"device_id": gpu})]
            if use_gpu
            else ["CPUExecutionProvider"],
        )

        if self.magface:
            self.ort_mag = onnxruntime.InferenceSession(
                osp.join(
                    osp.expanduser("~/.insightface/models"),
                    model,
                    "magface",
                    "magface_iresnet100.onnx",
                ),
                providers=[("CUDAExecutionProvider", {"device_id": gpu})]
                if use_gpu
                else ["CPUExecutionProvider"],
            )

    def _to_input_ada(self, aligned_bgr_img):
        _aligned_bgr_img = aligned_bgr_img.astype(np.float32)
        _aligned_bgr_img = ((_aligned_bgr_img / 255.0) - 0.5) / 0.5
        return _aligned_bgr_img.transpose(2, 0, 1).reshape(1, 3, 112, 112)

    def _to_input_mag(self, aligned_bgr_img):
        _aligned_bgr_img = aligned_bgr_img.astype(np.float32)
        _aligned_bgr_img = _aligned_bgr_img / 255.0
        return _aligned_bgr_img.transpose(2, 0, 1).reshape(1, 3, 112, 112)

    def get_most_central_face(self, img, faces):
        """
        faces is a insightface object with keypoints and bounding_box

        return: keypoints of the most central face
        """
        assert faces is not None
        img_center = np.array([img.shape[0] // 2, img.shape[1] // 2])
        dist = []

        # Compute centers of faces and distances from certer of image
        for idx, face in enumerate(faces):
            box = face.bbox.astype("int").flatten()
            face_center = np.array([(box[0] + box[2]) // 2, (box[1] + box[3]) // 2])
            dist.append(np.linalg.norm(img_center - face_center))

        # Get index of the face closest to the center of image
        idx = dist.index(min(dist))
        return idx, faces[idx].kps

    def get_larger_face(self, img, faces):
        """
        faces is a insightface object with keypoints and bounding_box

        return: keypoints of the larger face
        """
        assert faces is not None
        areas = []

        # Compute centers of faces and distances from certer of image
        for idx, face in enumerate(faces):
            box = face.bbox.astype("int").flatten()
            areas.append(abs((box[2] - box[0]) * (box[3] - box[1])))

        # Get index of the face closest to the center of image
        idx = areas.index(max(areas))
        return idx, faces[idx].kps

    def process_image_single_face(self, imgpath: str):  # Path to image to be processed
        """
        Process image and returns dict with:

        - keypoints: 5 facial points (left eye, right eye, nose tip, left mouth corner and right mouth corner)

        - ipd: interpupillary distance

        - pitch, yaw, roll angles

        - normalized_embedding

        - embedding_norm

        - aligned_face: face after alignment using the keypoints as references for affine transform

        - (optional) magface norm and magface features
        """
        if type(imgpath) == str:  # image path passed as argument
            bgr_img = cv2.imread(imgpath)
        else:  # image array passed as argument
            bgr_img = imgpath.copy()
        faces = self.detectmodel.get(bgr_img)
        if len(faces) == 0:
            return {}

        idx, kps = self.get_larger_face(bgr_img, faces)

        bbox = faces[idx].bbox.astype("int")
        bgr_aligned_face = face_align.norm_crop(bgr_img, kps)
        ipd = np.linalg.norm(kps[0] - kps[1])

        ada_inputs = {
            self.ort_ada.get_inputs()[0].name: self._to_input_ada(bgr_aligned_face)
        }
        normalized_embedding, norm = self.ort_ada.run(None, ada_inputs)

        ret = {
            "keypoints": kps,
            "ipd": ipd,
            "embedding": normalized_embedding.flatten() * norm.flatten()[0],
            "norm": norm.flatten()[0],
            "bbox": bbox,
            "aligned_face": cv2.cvtColor(bgr_aligned_face, cv2.COLOR_BGR2RGB),
        }

        if self.extended:
            gender = "M" if faces[idx].gender == 1 else "F"
            age = faces[idx].age
            pitch, yaw, roll = faces[idx].pose
            ret = {
                **ret,
                **{
                    "gender": gender,
                    "age": age,
                    "pitch": pitch,
                    "yaw": yaw,
                    "roll": roll,
                },
            }

        if self.magface:
            # mag_inputs = {self.ort_mag.get_inputs()[0].name: self._to_input_mag(bgr_aligned_face)}
            mag_embedding = self.ort_mag.run(None, ada_inputs)[0][0]
            mag_norm = np.linalg.norm(mag_embedding)
            ret = {
                **ret,
                **{
                    "magface_embedding": mag_embedding,
                    "magface_norm": mag_norm,
                },
            }

        return ret

    def process_image(self, imgpath):
        return self.process_image_single_face(imgpath)

    def process_image_multiple_faces(
        self,
        imgpath: str,  # Path to image to be processed
    ):
        """
        Process image and returns list of dicts with:

        - keypoints: 5 facial points (left eye, right eye, nose tip, left mouth corner and right mouth corner)

        - ipd: interpupillary distance

        - pitch, yaw, roll angles

        - normalized_embedding

        - embedding_norm

        - aligned_face: face after alignment using the keypoints as references for affine transform

        - (optional) magface norm and magface features
        """
        if type(imgpath) == str:  # image path passed as argument
            bgr_img = cv2.imread(imgpath)
        else:  # image array passed as argument
            bgr_img = imgpath.copy()
        faces = self.detectmodel.get(bgr_img)
        if len(faces) == 0:
            return []
        ret = []
        for face in faces:
            kps = face.kps
            bbox = face.bbox.astype("int")
            bgr_aligned_face = face_align.norm_crop(bgr_img, kps)
            ipd = np.linalg.norm(kps[0] - kps[1])
            ada_inputs = {
                self.ort_ada.get_inputs()[0].name: self._to_input_ada(bgr_aligned_face)
            }
            normalized_embedding, norm = self.ort_ada.run(None, ada_inputs)
            face_ret = {
                "keypoints": kps,
                "ipd": ipd,
                "embedding": normalized_embedding.flatten() * norm.flatten()[0],
                "norm": norm.flatten()[0],
                "bbox": bbox,
                "aligned_face": cv2.cvtColor(bgr_aligned_face, cv2.COLOR_BGR2RGB),
            }

            if self.extended:
                gender = "M" if face.gender == 1 else "F"
                age = face.age
                pitch, yaw, roll = face.pose
                face_ret = {
                    **face_ret,
                    **{
                        "gender": gender,
                        "age": age,
                        "pitch": pitch,
                        "yaw": yaw,
                        "roll": roll,
                    },
                }

            if self.magface:
                # mag_inputs = {self.ort_mag.get_inputs()[0].name: self._to_input_mag(bgr_aligned_face)}
                mag_embedding = self.ort_mag.run(None, ada_inputs)[0][0]
                mag_norm = np.linalg.norm(mag_embedding)
                face_ret = {
                    **face_ret,
                    **{"magface_embedding": mag_embedding, "magface_norm": mag_norm},
                }

            ret.append(face_ret)
        return ret


In [None]:
ff = ForensicFace(use_gpu=True, magface=False, extended=True)


Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'do_copy_in_default_stream': '1', 'arena_extend_strategy': 'kNextPowerOfTwo', 'gpu_external_empty_cache': '0', 'gpu_external_free': '0', 'cudnn_conv_use_max_workspace': '0', 'gpu_mem_limit': '18446744073709551615', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'gpu_external_alloc': '0', 'device_id': '0'}}
find model: /home/rafael/.insightface/models/sepaelv2/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'do_copy_in_default_stream': '1', 'arena_extend_strategy': 'kNextPowerOfTwo', 'gpu_external_empty_cache': '0', 'gpu_external_free': '0', 'cudnn_conv_use_max_workspace': '0', 'gpu_mem_limit': '18446744073709551615', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'gpu_external_alloc': '0', 'device_id': '0'}}
find mode

In [None]:
results = ff.process_image_multiple_faces("tela.png")
results[0].keys(), results[0]["keypoints"], results[0]["bbox"]


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


(dict_keys(['keypoints', 'ipd', 'embedding', 'norm', 'bbox', 'aligned_face', 'gender', 'age', 'pitch', 'yaw', 'roll']),
 array([[471.42743, 418.60498],
        [522.68933, 418.05362],
        [498.82196, 449.08923],
        [479.3499 , 476.44193],
        [514.33453, 476.06885]], dtype=float32),
 array([441, 355, 548, 506]))

In [None]:
results = ff.process_image_single_face("obama.png")
results.keys(), results["keypoints"]


(dict_keys(['keypoints', 'ipd', 'embedding', 'norm', 'bbox', 'aligned_face', 'gender', 'age', 'pitch', 'yaw', 'roll']),
 array([[103.60011, 139.88237],
        [174.2651 , 137.3372 ],
        [140.28094, 187.14757],
        [109.09432, 219.3402 ],
        [173.40782, 217.09576]], dtype=float32))

## Comparação entre duas imagens

In [None]:
# | export
@patch
def compare(self: ForensicFace, img1path: str, img2path: str):
    img1data = self.process_image(img1path)
    assert len(img1data) > 0
    img2data = self.process_image(img2path)
    assert len(img2data) > 0
    return np.dot(img1data["embedding"], img2data["embedding"]) / (
        img1data["norm"] * img2data["norm"]
    )


In [None]:
ff.compare("obama.png", "obama2.png")


0.8555971

## Agregação de embeddings

In [None]:
# | export
@patch
def aggregate_embeddings(self: ForensicFace, embeddings, weights=None):
    if weights is None:
        weights = np.ones(embeddings.shape[0], dtype="int")
    assert embeddings.shape[0] == weights.shape[0]
    return np.average(embeddings, axis=0, weights=weights)


In [None]:
# | export
@patch
def aggregate_from_images(self: ForensicFace, list_of_image_paths):
    embeddings = []
    weights = []
    for imgpath in list_of_image_paths:
        d = self.process_image(imgpath)
        if len(d) > 0:
            embeddings.append(d["embedding"])
    if len(embeddings) > 0:
        return self.aggregate_embeddings(np.array(embeddings))
    else:
        return []


In [None]:
aggregated = ff.aggregate_from_images(["obama.png", "obama2.png"])
aggregated.shape


(512,)

## Suporte a MagFace

Para utilizar, instancie o forensicface com a opção magface = True:

``ff = forensicface(magface=True)``

Modelo de [MagFace](https://github.com/IrvingMeng/MagFace)

In [None]:
ff = ForensicFace(det_size=320, use_gpu=True, magface=True)
good = ff.process_image("001_frontal.JPG")
bad = ff.process_image("001_cam1_1.jpg")
good["magface_norm"], bad["magface_norm"]


Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'do_copy_in_default_stream': '1', 'arena_extend_strategy': 'kNextPowerOfTwo', 'gpu_external_empty_cache': '0', 'gpu_external_free': '0', 'cudnn_conv_use_max_workspace': '0', 'gpu_mem_limit': '18446744073709551615', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'gpu_external_alloc': '0', 'device_id': '0'}}
find model: /home/rafael/.insightface/models/sepaelv2/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}, 'CUDAExecutionProvider': {'do_copy_in_default_stream': '1', 'arena_extend_strategy': 'kNextPowerOfTwo', 'gpu_external_empty_cache': '0', 'gpu_external_free': '0', 'cudnn_conv_use_max_workspace': '0', 'gpu_mem_limit': '18446744073709551615', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'gpu_external_alloc': '0', 'device_id': '0'}}
find mode

(23.233418, 22.57745)

## Extração de faces de vídeos com margem

In [None]:
# | export


@patch
def _get_extended_bbox(self: ForensicFace, bbox, frame_shape, margin_factor):
    # add a margin on the bounding box
    (startX, startY, endX, endY) = bbox.astype("int")
    (h, w) = frame_shape[:2]
    out_width = (endX - startX) * margin_factor
    out_height = (endY - startY) * margin_factor

    startX_out = int((startX + endX) / 2 - out_width / 2)
    endX_out = int((startX + endX) / 2 + out_width / 2)
    startY_out = int((startY + endY) / 2 - out_height / 2)
    endY_out = int((startY + endY) / 2 + out_height / 2)

    # tests if the output bbox coordinates are out of frame limits
    if startX_out < 0:
        startX_out = 0
    if endX_out > int(w):
        endX_out = int(w)
    if startY_out < 0:
        startY_out = 0
    if endY_out > int(h):
        endY_out = int(h)
    return [startX_out, startY_out, endX_out, endY_out]


@patch
def extract_faces(
    self: ForensicFace,
    video_path: str,  # path to video file
    dest_folder: str = None,  # folder used to save extracted faces. If not provided, a new folder with the video name is created
    every_n_frames: int = 1,  # skip some frames
    margin: float = 2.0,  # margin to add to each face, w.r.t. detected bounding box
    start_from: float = 0.0,  # seconds after video start to begin processing
):
    if dest_folder is None:
        dest_folder = os.path.splitext(video_path)[0]

    os.makedirs(dest_folder, exist_ok=True)

    # initialize video stream from file
    vs = cv2.VideoCapture(video_path)
    fps = vs.get(cv2.CAP_PROP_FPS)
    start_frame = int(fps * start_from)

    # seek to starting frame
    vs.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    current_frame = start_frame
    nfaces = 0
    while True:

        if (current_frame % every_n_frames) != 0:
            current_frame = current_frame + 1
            vs.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
            continue

        ret, frame = vs.read()

        if not ret:
            break
        current_frame = current_frame + 1
        (h, w) = frame.shape[:2]

        faces = self.detectmodel.get(frame)
        for i, face in enumerate(faces):
            startX, startY, endX, endY = face.bbox.astype("int")
            faceW = endX - startX
            faceH = endY - startY
            outBbox = self._get_extended_bbox(
                face.bbox, frame.shape, margin_factor=margin
            )
            # export the face (with added margin)
            face_crop = frame[outBbox[1] : outBbox[3], outBbox[0] : outBbox[2]]
            face_img_path = os.path.join(
                dest_folder, f"frame_{current_frame:07}_face_{i:02}.png"
            )
            cv2.imwrite(face_img_path, face_crop)
            nfaces += 1
    vs.release()
    return nfaces


In [None]:
ff = ForensicFace()


In [None]:
ff.extract_faces(
    video_path="/home/rafael/productionID_3762907.mp4",
    start_from=0,
    every_n_frames=10,
    dest_folder="/home/rafael/video_faces",
)
