From 5f27fa70045b20b0712079fb8e35294eb4614351 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Wed, 31 Jan 2024 19:09:24 +0000 Subject: [PATCH 1/7] add test coverage support --- .gitignore | 2 ++ Makefile | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 63ebe2c8..eaa42ce1 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ tests/dataset/*.pkl tests/*.ipynb tests/*.csv *.pyc +**/.coverage +**/.coverage.* \ No newline at end of file diff --git a/Makefile b/Makefile index af58c7f7..cb8e9aeb 100644 --- a/Makefile +++ b/Makefile @@ -2,4 +2,7 @@ test: cd tests && python -m pytest . -s --disable-warnings lint: - python -m pylint deepface/ --fail-under=10 \ No newline at end of file + python -m pylint deepface/ --fail-under=10 + +coverage: + pip install pytest-cov && cd tests && python -m pytest --cov=deepface \ No newline at end of file From 9fbb229b97f62e357decf95f15f6811b9e07c38e Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Wed, 31 Jan 2024 19:10:51 +0000 Subject: [PATCH 2/7] Sorting bug for RGB restoration --- deepface/detectors/MtCnn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/deepface/detectors/MtCnn.py b/deepface/detectors/MtCnn.py index 0330c08b..e1608d1a 100644 --- a/deepface/detectors/MtCnn.py +++ b/deepface/detectors/MtCnn.py @@ -1,5 +1,4 @@ from typing import List -import cv2 import numpy as np from mtcnn import MTCNN from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion @@ -32,7 +31,9 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace detected_face = None - img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # mtcnn expects RGB but OpenCV read BGR + # mtcnn expects RGB but OpenCV read BGR + # img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img_rgb = img[:, :, ::-1] detections = self.model.detect_faces(img_rgb) if detections is not None and len(detections) > 0: From 95c55c0401999140ef14776807b193e5eb1ee1cc Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Wed, 31 Jan 2024 19:12:16 +0000 Subject: [PATCH 3/7] moving logics under modules - all extract_faces logic moved to detection module - load image related logics moved to preprocessing module --- deepface/DeepFace.py | 1 + deepface/commons/functions.py | 283 ----------------------------- deepface/modules/demography.py | 12 +- deepface/modules/detection.py | 140 ++++++++++++-- deepface/modules/preprocessing.py | 131 +++++++++++++ deepface/modules/recognition.py | 18 +- deepface/modules/representation.py | 24 ++- deepface/modules/verification.py | 18 +- tests/test_extract_faces.py | 24 ++- 9 files changed, 319 insertions(+), 332 deletions(-) create mode 100644 deepface/modules/preprocessing.py diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index c235ac89..7e55de8e 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -449,6 +449,7 @@ def extract_faces( enforce_detection=enforce_detection, align=align, grayscale=grayscale, + human_readable=True, ) diff --git a/deepface/commons/functions.py b/deepface/commons/functions.py index cdb047d6..cc39756c 100644 --- a/deepface/commons/functions.py +++ b/deepface/commons/functions.py @@ -1,42 +1,19 @@ import os -from typing import Union, Tuple, List -import base64 from pathlib import Path # 3rd party dependencies -from PIL import Image -import requests -import numpy as np -import cv2 import tensorflow as tf # package dependencies -from deepface.detectors import DetectorWrapper -from deepface.models.Detector import DetectedFace, FacialAreaRegion from deepface.commons.logger import Logger logger = Logger(module="commons.functions") -# pylint: disable=no-else-raise - -# -------------------------------------------------- -# configurations of dependencies - def get_tf_major_version() -> int: return int(tf.__version__.split(".", maxsplit=1)[0]) -tf_major_version = get_tf_major_version() - -if tf_major_version == 1: - from keras.preprocessing import image -elif tf_major_version == 2: - from tensorflow.keras.preprocessing import image - -# -------------------------------------------------- - - def initialize_folder() -> None: """Initialize the folder for storing weights and models. @@ -65,266 +42,6 @@ def get_deepface_home() -> str: return str(os.getenv("DEEPFACE_HOME", default=str(Path.home()))) -# -------------------------------------------------- - - -def loadBase64Img(uri: str) -> np.ndarray: - """Load image from base64 string. - - Args: - uri: a base64 string. - - Returns: - numpy array: the loaded image. - """ - encoded_data = uri.split(",")[1] - nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8) - img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) - # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) - return img_bgr - - -def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: - """ - Load image from path, url, base64 or numpy array. - Args: - img: a path, url, base64 or numpy array. - Returns: - image (numpy array): the loaded image in BGR format - image name (str): image name itself - """ - - # The image is already a numpy array - if isinstance(img, np.ndarray): - return img, "numpy array" - - if isinstance(img, Path): - img = str(img) - - if not isinstance(img, str): - raise ValueError(f"img must be numpy array or str but it is {type(img)}") - - # The image is a base64 string - if img.startswith("data:image/"): - return loadBase64Img(img), "base64 encoded string" - - # The image is a url - if img.startswith("http"): - return ( - np.array(Image.open(requests.get(img, stream=True, timeout=60).raw).convert("BGR")), - # return url as image name - img, - ) - - # The image is a path - if os.path.isfile(img) is not True: - raise ValueError(f"Confirm that {img} exists") - - # image must be a file on the system then - - # image name must have english characters - if img.isascii() is False: - raise ValueError(f"Input image must not have non-english characters - {img}") - - img_obj_bgr = cv2.imread(img) - # img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB) - return img_obj_bgr, img - - -# -------------------------------------------------- - - -def extract_faces( - img: Union[str, np.ndarray], - target_size: tuple = (224, 224), - detector_backend: str = "opencv", - grayscale: bool = False, - enforce_detection: bool = True, - align: bool = True, -) -> List[Tuple[np.ndarray, dict, float]]: - """ - Extract faces from an image. - Args: - img: a path, url, base64 or numpy array. - target_size (tuple, optional): the target size of the extracted faces. - Defaults to (224, 224). - detector_backend (str, optional): the face detector backend. Defaults to "opencv". - grayscale (bool, optional): whether to convert the extracted faces to grayscale. - Defaults to False. - enforce_detection (bool, optional): whether to enforce face detection. Defaults to True. - align (bool, optional): whether to align the extracted faces. Defaults to True. - - Raises: - ValueError: if face could not be detected and enforce_detection is True. - - Returns: - results (List[Tuple[np.ndarray, dict, float]]): A list of tuples - where each tuple contains: - - detected_face (np.ndarray): The detected face as a NumPy array. - - face_region (dict): The image region represented as - {"x": x, "y": y, "w": w, "h": h} - - confidence (float): The confidence score associated with the detected face. - """ - - # this is going to store a list of img itself (numpy), it region and confidence - extracted_faces = [] - - # img might be path, base64 or numpy array. Convert it to numpy whatever it is. - img, img_name = load_image(img) - - base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0]) - - if detector_backend == "skip": - face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)] - else: - face_objs = DetectorWrapper.detect_faces(detector_backend, img, align) - - # in case of no face found - if len(face_objs) == 0 and enforce_detection is True: - if img_name is not None: - raise ValueError( - f"Face could not be detected in {img_name}." - "Please confirm that the picture is a face photo " - "or consider to set enforce_detection param to False." - ) - else: - raise ValueError( - "Face could not be detected. Please confirm that the picture is a face photo " - "or consider to set enforce_detection param to False." - ) - - if len(face_objs) == 0 and enforce_detection is False: - face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)] - - for face_obj in face_objs: - current_img = face_obj.img - current_region = face_obj.facial_area - confidence = face_obj.confidence - if current_img.shape[0] > 0 and current_img.shape[1] > 0: - if grayscale is True: - current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY) - - # resize and padding - factor_0 = target_size[0] / current_img.shape[0] - factor_1 = target_size[1] / current_img.shape[1] - factor = min(factor_0, factor_1) - - dsize = ( - int(current_img.shape[1] * factor), - int(current_img.shape[0] * factor), - ) - current_img = cv2.resize(current_img, dsize) - - diff_0 = target_size[0] - current_img.shape[0] - diff_1 = target_size[1] - current_img.shape[1] - if grayscale is False: - # Put the base image in the middle of the padded image - current_img = np.pad( - current_img, - ( - (diff_0 // 2, diff_0 - diff_0 // 2), - (diff_1 // 2, diff_1 - diff_1 // 2), - (0, 0), - ), - "constant", - ) - else: - current_img = np.pad( - current_img, - ( - (diff_0 // 2, diff_0 - diff_0 // 2), - (diff_1 // 2, diff_1 - diff_1 // 2), - ), - "constant", - ) - - # double check: if target image is not still the same size with target. - if current_img.shape[0:2] != target_size: - current_img = cv2.resize(current_img, target_size) - - # normalizing the image pixels - # what this line doing? must? - img_pixels = image.img_to_array(current_img) - img_pixels = np.expand_dims(img_pixels, axis=0) - img_pixels /= 255 # normalize input in [0, 1] - - # int cast is for the exception - object of type 'float32' is not JSON serializable - region_obj = { - "x": current_region.x, - "y": current_region.y, - "w": current_region.w, - "h": current_region.h, - } - - extracted_face = (img_pixels, region_obj, confidence) - extracted_faces.append(extracted_face) - - if len(extracted_faces) == 0 and enforce_detection == True: - raise ValueError( - f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False." - ) - - return extracted_faces - - -def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray: - """Normalize input image. - - Args: - img (numpy array): the input image. - normalization (str, optional): the normalization technique. Defaults to "base", - for no normalization. - - Returns: - numpy array: the normalized image. - """ - - # issue 131 declares that some normalization techniques improves the accuracy - - if normalization == "base": - return img - - # @trevorgribble and @davedgd contributed this feature - # restore input in scale of [0, 255] because it was normalized in scale of - # [0, 1] in preprocess_face - img *= 255 - - if normalization == "raw": - pass # return just restored pixels - - elif normalization == "Facenet": - mean, std = img.mean(), img.std() - img = (img - mean) / std - - elif normalization == "Facenet2018": - # simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted) - img /= 127.5 - img -= 1 - - elif normalization == "VGGFace": - # mean subtraction based on VGGFace1 training data - img[..., 0] -= 93.5940 - img[..., 1] -= 104.7624 - img[..., 2] -= 129.1863 - - elif normalization == "VGGFace2": - # mean subtraction based on VGGFace2 training data - img[..., 0] -= 91.4953 - img[..., 1] -= 103.8827 - img[..., 2] -= 131.0912 - - elif normalization == "ArcFace": - # Reference study: The faces are cropped and resized to 112×112, - # and each pixel (ranged between [0, 255]) in RGB images is normalised - # by subtracting 127.5 then divided by 128. - img -= 127.5 - img /= 128 - else: - raise ValueError(f"unimplemented normalization type - {normalization}") - - return img - - def find_target_size(model_name: str) -> tuple: """Find the target size of the model. diff --git a/deepface/modules/demography.py b/deepface/modules/demography.py index 38677089..3372b69f 100644 --- a/deepface/modules/demography.py +++ b/deepface/modules/demography.py @@ -6,8 +6,7 @@ from tqdm import tqdm # project dependencies -from deepface.modules import modeling -from deepface.commons import functions +from deepface.modules import modeling, detection from deepface.extendedmodels import Gender, Race, Emotion @@ -114,8 +113,8 @@ def analyze( # --------------------------------- resp_objects = [] - img_objs = functions.extract_faces( - img=img_path, + img_objs = detection.extract_faces( + img_path=img_path, target_size=(224, 224), detector_backend=detector_backend, grayscale=False, @@ -123,7 +122,10 @@ def analyze( align=align, ) - for img_content, img_region, img_confidence in img_objs: + for img_obj in img_objs: + img_content = img_obj["face"] + img_region = img_obj["facial_area"] + img_confidence = img_obj["confidence"] if img_content.shape[0] > 0 and img_content.shape[1] > 0: obj = {} # facial attribute analysis diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py index 7fbacff3..2085d764 100644 --- a/deepface/modules/detection.py +++ b/deepface/modules/detection.py @@ -3,10 +3,26 @@ # 3rd part dependencies import numpy as np +import cv2 from PIL import Image # project dependencies +from deepface.modules import preprocessing +from deepface.models.Detector import DetectedFace, FacialAreaRegion +from deepface.detectors import DetectorWrapper from deepface.commons import functions +from deepface.commons.logger import Logger + +logger = Logger(module="deepface/modules/detection.py") + +# pylint: disable=no-else-raise + + +tf_major_version = functions.get_tf_major_version() +if tf_major_version == 1: + from keras.preprocessing import image +elif tf_major_version == 2: + from tensorflow.keras.preprocessing import image def extract_faces( @@ -16,6 +32,7 @@ def extract_faces( enforce_detection: bool = True, align: bool = True, grayscale: bool = False, + human_readable=False, ) -> List[Dict[str, Any]]: """ Extract faces from a given image @@ -38,6 +55,8 @@ def extract_faces( grayscale (boolean): Flag to convert the image to grayscale before processing (default is False). + human_readable (bool): Flag to make the image human readable. 3D RGB for human readable + or 4D BGR for ML models (default is False). Returns: results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains: @@ -48,27 +67,108 @@ def extract_faces( resp_objs = [] - img_objs = functions.extract_faces( - img=img_path, - target_size=target_size, - detector_backend=detector_backend, - grayscale=grayscale, - enforce_detection=enforce_detection, - align=align, - ) - - for img, region, confidence in img_objs: - resp_obj = {} - + # img might be path, base64 or numpy array. Convert it to numpy whatever it is. + img, img_name = preprocessing.load_image(img_path) + + base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0]) + + if detector_backend == "skip": + face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)] + else: + face_objs = DetectorWrapper.detect_faces(detector_backend, img, align) + + # in case of no face found + if len(face_objs) == 0 and enforce_detection is True: + if img_name is not None: + raise ValueError( + f"Face could not be detected in {img_name}." + "Please confirm that the picture is a face photo " + "or consider to set enforce_detection param to False." + ) + else: + raise ValueError( + "Face could not be detected. Please confirm that the picture is a face photo " + "or consider to set enforce_detection param to False." + ) + + if len(face_objs) == 0 and enforce_detection is False: + face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)] + + for face_obj in face_objs: + current_img = face_obj.img + current_region = face_obj.facial_area + confidence = face_obj.confidence + + if current_img.shape[0] == 0 or current_img.shape[1] == 0: + continue + + if grayscale is True: + current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY) + + # resize and padding + factor_0 = target_size[0] / current_img.shape[0] + factor_1 = target_size[1] / current_img.shape[1] + factor = min(factor_0, factor_1) + + dsize = ( + int(current_img.shape[1] * factor), + int(current_img.shape[0] * factor), + ) + current_img = cv2.resize(current_img, dsize) + + diff_0 = target_size[0] - current_img.shape[0] + diff_1 = target_size[1] - current_img.shape[1] + if grayscale is False: + # Put the base image in the middle of the padded image + current_img = np.pad( + current_img, + ( + (diff_0 // 2, diff_0 - diff_0 // 2), + (diff_1 // 2, diff_1 - diff_1 // 2), + (0, 0), + ), + "constant", + ) + else: + current_img = np.pad( + current_img, + ( + (diff_0 // 2, diff_0 - diff_0 // 2), + (diff_1 // 2, diff_1 - diff_1 // 2), + ), + "constant", + ) + + # double check: if target image is not still the same size with target. + if current_img.shape[0:2] != target_size: + current_img = cv2.resize(current_img, target_size) + + # normalizing the image pixels + # what this line doing? must? + img_pixels = image.img_to_array(current_img) + img_pixels = np.expand_dims(img_pixels, axis=0) + img_pixels /= 255 # normalize input in [0, 1] # discard expanded dimension - if len(img.shape) == 4: - img = img[0] - - # bgr to rgb - resp_obj["face"] = img[:, :, ::-1] - resp_obj["facial_area"] = region - resp_obj["confidence"] = confidence - resp_objs.append(resp_obj) + if human_readable is True and len(img_pixels.shape) == 4: + img_pixels = img_pixels[0] + + resp_objs.append( + { + "face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels, + "facial_area": { + "x": current_region.x, + "y": current_region.y, + "w": current_region.w, + "h": current_region.h, + }, + "confidence": confidence, + } + ) + + if len(resp_objs) == 0 and enforce_detection == True: + raise ValueError( + f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False." + ) return resp_objs diff --git a/deepface/modules/preprocessing.py b/deepface/modules/preprocessing.py new file mode 100644 index 00000000..aad9b0b4 --- /dev/null +++ b/deepface/modules/preprocessing.py @@ -0,0 +1,131 @@ +import os +from typing import Union, Tuple +import base64 +from pathlib import Path + +# 3rd party +import numpy as np +import cv2 +from PIL import Image +import requests + + +def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: + """ + Load image from path, url, base64 or numpy array. + Args: + img: a path, url, base64 or numpy array. + Returns: + image (numpy array): the loaded image in BGR format + image name (str): image name itself + """ + + # The image is already a numpy array + if isinstance(img, np.ndarray): + return img, "numpy array" + + if isinstance(img, Path): + img = str(img) + + if not isinstance(img, str): + raise ValueError(f"img must be numpy array or str but it is {type(img)}") + + # The image is a base64 string + if img.startswith("data:image/"): + return load_base64(img), "base64 encoded string" + + # The image is a url + if img.startswith("http"): + return ( + np.array(Image.open(requests.get(img, stream=True, timeout=60).raw).convert("BGR")), + # return url as image name + img, + ) + + # The image is a path + if os.path.isfile(img) is not True: + raise ValueError(f"Confirm that {img} exists") + + # image must be a file on the system then + + # image name must have english characters + if img.isascii() is False: + raise ValueError(f"Input image must not have non-english characters - {img}") + + img_obj_bgr = cv2.imread(img) + # img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB) + return img_obj_bgr, img + + +def load_base64(uri: str) -> np.ndarray: + """Load image from base64 string. + + Args: + uri: a base64 string. + + Returns: + numpy array: the loaded image. + """ + encoded_data = uri.split(",")[1] + nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8) + img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) + return img_bgr + + +def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray: + """Normalize input image. + + Args: + img (numpy array): the input image. + normalization (str, optional): the normalization technique. Defaults to "base", + for no normalization. + + Returns: + numpy array: the normalized image. + """ + + # issue 131 declares that some normalization techniques improves the accuracy + + if normalization == "base": + return img + + # @trevorgribble and @davedgd contributed this feature + # restore input in scale of [0, 255] because it was normalized in scale of + # [0, 1] in preprocess_face + img *= 255 + + if normalization == "raw": + pass # return just restored pixels + + elif normalization == "Facenet": + mean, std = img.mean(), img.std() + img = (img - mean) / std + + elif normalization == "Facenet2018": + # simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted) + img /= 127.5 + img -= 1 + + elif normalization == "VGGFace": + # mean subtraction based on VGGFace1 training data + img[..., 0] -= 93.5940 + img[..., 1] -= 104.7624 + img[..., 2] -= 129.1863 + + elif normalization == "VGGFace2": + # mean subtraction based on VGGFace2 training data + img[..., 0] -= 91.4953 + img[..., 1] -= 103.8827 + img[..., 2] -= 131.0912 + + elif normalization == "ArcFace": + # Reference study: The faces are cropped and resized to 112×112, + # and each pixel (ranged between [0, 255]) in RGB images is normalised + # by subtracting 127.5 then divided by 128. + img -= 127.5 + img /= 128 + else: + raise ValueError(f"unimplemented normalization type - {normalization}") + + return img diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index fe9e8143..393b8ff6 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -12,7 +12,7 @@ # project dependencies from deepface.commons import functions, distance as dst from deepface.commons.logger import Logger -from deepface.modules import representation +from deepface.modules import representation, detection logger = Logger(module="deepface/modules/recognition.py") @@ -202,8 +202,8 @@ def find( ) # img path might have more than once face - source_objs = functions.extract_faces( - img=img_path, + source_objs = detection.extract_faces( + img_path=img_path, target_size=target_size, detector_backend=detector_backend, grayscale=False, @@ -213,7 +213,9 @@ def find( resp_obj = [] - for source_img, source_region, _ in source_objs: + for source_obj in source_objs: + source_img = source_obj["face"] + source_region = source_obj["facial_area"] target_embedding_obj = representation.represent( img_path=source_img, model_name=model_name, @@ -333,8 +335,8 @@ def __find_bulk_embeddings( desc="Finding representations", disable=silent, ): - img_objs = functions.extract_faces( - img=employee, + img_objs = detection.extract_faces( + img_path=employee, target_size=target_size, detector_backend=detector_backend, grayscale=False, @@ -342,7 +344,9 @@ def __find_bulk_embeddings( align=align, ) - for img_content, img_region, _ in img_objs: + for img_obj in img_objs: + img_content = img_obj["face"] + img_region = img_obj["facial_area"] embedding_obj = representation.represent( img_path=img_content, model_name=model_name, diff --git a/deepface/modules/representation.py b/deepface/modules/representation.py index 932c3d71..3a389ebb 100644 --- a/deepface/modules/representation.py +++ b/deepface/modules/representation.py @@ -6,7 +6,7 @@ import cv2 # project dependencies -from deepface.modules import modeling +from deepface.modules import modeling, detection, preprocessing from deepface.commons import functions from deepface.models.FacialRecognition import FacialRecognition @@ -63,8 +63,8 @@ def represent( # we have run pre-process in verification. so, this can be skipped if it is coming from verify. target_size = functions.find_target_size(model_name=model_name) if detector_backend != "skip": - img_objs = functions.extract_faces( - img=img_path, + img_objs = detection.extract_faces( + img_path=img_path, target_size=(target_size[1], target_size[0]), detector_backend=detector_backend, grayscale=False, @@ -73,7 +73,7 @@ def represent( ) else: # skip # Try load. If load error, will raise exception internal - img, _ = functions.load_image(img_path) + img, _ = preprocessing.load_image(img_path) # -------------------------------- if len(img.shape) == 4: img = img[0] # e.g. (1, 224, 224, 3) to (224, 224, 3) @@ -85,13 +85,21 @@ def represent( img = (img.astype(np.float32) / 255.0).astype(np.float32) # -------------------------------- # make dummy region and confidence to keep compatibility with `extract_faces` - img_region = {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]} - img_objs = [(img, img_region, 0)] + img_objs = [ + { + "face": img, + "facial_area": {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]}, + "confidence": 0, + } + ] # --------------------------------- - for img, region, confidence in img_objs: + for img_obj in img_objs: + img = img_obj["face"] + region = img_obj["facial_area"] + confidence = img_obj["confidence"] # custom normalization - img = functions.normalize_input(img=img, normalization=normalization) + img = preprocessing.normalize_input(img=img, normalization=normalization) embedding = model.find_embeddings(img) diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py index 442e2630..26281fb1 100644 --- a/deepface/modules/verification.py +++ b/deepface/modules/verification.py @@ -7,7 +7,7 @@ # project dependencies from deepface.commons import functions, distance as dst -from deepface.modules import representation +from deepface.modules import representation, detection def verify( @@ -82,8 +82,8 @@ def verify( target_size = functions.find_target_size(model_name=model_name) # img pairs might have many faces - img1_objs = functions.extract_faces( - img=img1_path, + img1_objs = detection.extract_faces( + img_path=img1_path, target_size=target_size, detector_backend=detector_backend, grayscale=False, @@ -91,8 +91,8 @@ def verify( align=align, ) - img2_objs = functions.extract_faces( - img=img2_path, + img2_objs = detection.extract_faces( + img_path=img2_path, target_size=target_size, detector_backend=detector_backend, grayscale=False, @@ -103,8 +103,12 @@ def verify( distances = [] regions = [] # now we will find the face pair with minimum distance - for img1_content, img1_region, _ in img1_objs: - for img2_content, img2_region, _ in img2_objs: + for img1_obj in img1_objs: + img1_content = img1_obj["face"] + img1_region = img1_obj["facial_area"] + for img2_obj in img2_objs: + img2_content = img2_obj["face"] + img2_region = img2_obj["facial_area"] img1_embedding_obj = representation.represent( img_path=img1_content, model_name=model_name, diff --git a/tests/test_extract_faces.py b/tests/test_extract_faces.py index ecdafeca..41bd2a0d 100644 --- a/tests/test_extract_faces.py +++ b/tests/test_extract_faces.py @@ -1,12 +1,14 @@ +import numpy as np +import pytest from deepface import DeepFace from deepface.commons.logger import Logger logger = Logger("tests/test_extract_faces.py") +detectors = ["opencv", "mtcnn"] -def test_different_detectors(): - detectors = ["opencv", "mtcnn"] +def test_different_detectors(): for detector in detectors: img_objs = DeepFace.extract_faces(img_path="dataset/img11.jpg", detector_backend=detector) for img_obj in img_objs: @@ -22,3 +24,21 @@ def test_different_detectors(): img = img_obj["face"] assert img.shape[0] > 0 and img.shape[1] > 0 logger.info(f"✅ extract_faces for {detector} backend test is done") + + +def test_backends_for_enforced_detection_with_non_facial_inputs(): + black_img = np.zeros([224, 224, 3]) + for detector in detectors: + with pytest.raises(ValueError): + _ = DeepFace.extract_faces(img_path=black_img, detector_backend=detector) + logger.info("✅ extract_faces for enforced detection and non-facial image test is done") + + +def test_backends_for_not_enforced_detection_with_non_facial_inputs(): + black_img = np.zeros([224, 224, 3]) + for detector in detectors: + objs = DeepFace.extract_faces( + img_path=black_img, detector_backend=detector, enforce_detection=False + ) + assert objs[0]["face"].shape == (224, 224, 3) + logger.info("✅ extract_faces for not enforced detection and non-facial image test is done") From 4800aa3e8c2c5b3220e0edfb5542e7dd4dda7835 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Wed, 31 Jan 2024 19:37:43 +0000 Subject: [PATCH 4/7] input shape is now retrieved from models --- deepface/basemodels/ArcFace.py | 2 ++ deepface/basemodels/DeepID.py | 2 ++ deepface/basemodels/Dlib.py | 2 ++ deepface/basemodels/Facenet.py | 4 +++ deepface/basemodels/FbDeepFace.py | 2 ++ deepface/basemodels/OpenFace.py | 2 ++ deepface/basemodels/SFace.py | 2 ++ deepface/basemodels/VGGFace.py | 2 ++ deepface/commons/functions.py | 30 --------------------- deepface/models/FacialRecognition.py | 5 +++- deepface/modules/realtime.py | 11 ++++---- deepface/modules/recognition.py | 8 +++--- deepface/modules/representation.py | 3 +-- deepface/modules/verification.py | 8 +++--- tests/face-recognition-how.py | 40 +++++++++++++++++++--------- 15 files changed, 66 insertions(+), 57 deletions(-) diff --git a/deepface/basemodels/ArcFace.py b/deepface/basemodels/ArcFace.py index bb639f3d..cacacd4f 100644 --- a/deepface/basemodels/ArcFace.py +++ b/deepface/basemodels/ArcFace.py @@ -53,6 +53,8 @@ class ArcFaceClient(FacialRecognition): def __init__(self): self.model = load_model() self.model_name = "ArcFace" + self.input_shape = (112, 112) + self.output_shape = 512 def find_embeddings(self, img: np.ndarray) -> List[float]: """ diff --git a/deepface/basemodels/DeepID.py b/deepface/basemodels/DeepID.py index 8b4a7d5b..0933c276 100644 --- a/deepface/basemodels/DeepID.py +++ b/deepface/basemodels/DeepID.py @@ -49,6 +49,8 @@ class DeepIdClient(FacialRecognition): def __init__(self): self.model = load_model() self.model_name = "DeepId" + self.input_shape = (47, 55) + self.output_shape = 160 def find_embeddings(self, img: np.ndarray) -> List[float]: """ diff --git a/deepface/basemodels/Dlib.py b/deepface/basemodels/Dlib.py index 6d06ef41..8336ca6f 100644 --- a/deepface/basemodels/Dlib.py +++ b/deepface/basemodels/Dlib.py @@ -20,6 +20,8 @@ class DlibClient(FacialRecognition): def __init__(self): self.model = DlibResNet() self.model_name = "Dlib" + self.input_shape = (150, 150) + self.output_shape = 128 def find_embeddings(self, img: np.ndarray) -> List[float]: """ diff --git a/deepface/basemodels/Facenet.py b/deepface/basemodels/Facenet.py index d95da023..31811a3e 100644 --- a/deepface/basemodels/Facenet.py +++ b/deepface/basemodels/Facenet.py @@ -53,6 +53,8 @@ class FaceNet128dClient(FacialRecognition): def __init__(self): self.model = load_facenet128d_model() self.model_name = "FaceNet-128d" + self.input_shape = (160, 160) + self.output_shape = 128 def find_embeddings(self, img: np.ndarray) -> List[float]: """ @@ -75,6 +77,8 @@ class FaceNet512dClient(FacialRecognition): def __init__(self): self.model = load_facenet512d_model() self.model_name = "FaceNet-512d" + self.input_shape = (160, 160) + self.output_shape = 512 def find_embeddings(self, img: np.ndarray) -> List[float]: """ diff --git a/deepface/basemodels/FbDeepFace.py b/deepface/basemodels/FbDeepFace.py index 075626b8..1a47685e 100644 --- a/deepface/basemodels/FbDeepFace.py +++ b/deepface/basemodels/FbDeepFace.py @@ -46,6 +46,8 @@ class DeepFaceClient(FacialRecognition): def __init__(self): self.model = load_model() self.model_name = "DeepFace" + self.input_shape = (152, 152) + self.output_shape = 4096 def find_embeddings(self, img: np.ndarray) -> List[float]: """ diff --git a/deepface/basemodels/OpenFace.py b/deepface/basemodels/OpenFace.py index 38672912..8ee2d95d 100644 --- a/deepface/basemodels/OpenFace.py +++ b/deepface/basemodels/OpenFace.py @@ -36,6 +36,8 @@ class OpenFaceClient(FacialRecognition): def __init__(self): self.model = load_model() self.model_name = "OpenFace" + self.input_shape = (96, 96) + self.output_shape = 128 def find_embeddings(self, img: np.ndarray) -> List[float]: """ diff --git a/deepface/basemodels/SFace.py b/deepface/basemodels/SFace.py index ba8b55e5..f8ec1921 100644 --- a/deepface/basemodels/SFace.py +++ b/deepface/basemodels/SFace.py @@ -22,6 +22,8 @@ class SFaceClient(FacialRecognition): def __init__(self): self.model = load_model() self.model_name = "SFace" + self.input_shape = (112, 112) + self.output_shape = 128 def find_embeddings(self, img: np.ndarray) -> List[float]: """ diff --git a/deepface/basemodels/VGGFace.py b/deepface/basemodels/VGGFace.py index 7f204f43..c4516a8c 100644 --- a/deepface/basemodels/VGGFace.py +++ b/deepface/basemodels/VGGFace.py @@ -43,6 +43,8 @@ class VggFaceClient(FacialRecognition): def __init__(self): self.model = load_model() self.model_name = "VGG-Face" + self.input_shape = (224, 224) + self.output_shape = 4096 def find_embeddings(self, img: np.ndarray) -> List[float]: """ diff --git a/deepface/commons/functions.py b/deepface/commons/functions.py index cc39756c..c7b317ac 100644 --- a/deepface/commons/functions.py +++ b/deepface/commons/functions.py @@ -40,33 +40,3 @@ def get_deepface_home() -> str: str: the home directory. """ return str(os.getenv("DEEPFACE_HOME", default=str(Path.home()))) - - -def find_target_size(model_name: str) -> tuple: - """Find the target size of the model. - - Args: - model_name (str): the model name. - - Returns: - tuple: the target size. - """ - - target_sizes = { - "VGG-Face": (224, 224), - "Facenet": (160, 160), - "Facenet512": (160, 160), - "OpenFace": (96, 96), - "DeepFace": (152, 152), - "DeepID": (47, 55), - "Dlib": (150, 150), - "ArcFace": (112, 112), - "SFace": (112, 112), - } - - target_size = target_sizes.get(model_name) - - if target_size == None: - raise ValueError(f"unimplemented model name - {model_name}") - - return target_size diff --git a/deepface/models/FacialRecognition.py b/deepface/models/FacialRecognition.py index b49292c0..c7aff6b4 100644 --- a/deepface/models/FacialRecognition.py +++ b/deepface/models/FacialRecognition.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Union, List +from typing import Any, Union, List, Tuple import numpy as np from deepface.commons import functions @@ -15,6 +15,9 @@ class FacialRecognition(ABC): model: Union[Model, Any] model_name: str + input_shape: Tuple[int, int] + output_shape: int + @abstractmethod def find_embeddings(self, img: np.ndarray) -> List[float]: diff --git a/deepface/modules/realtime.py b/deepface/modules/realtime.py index 3edd5d86..b87377f3 100644 --- a/deepface/modules/realtime.py +++ b/deepface/modules/realtime.py @@ -4,7 +4,7 @@ import pandas as pd import cv2 from deepface import DeepFace -from deepface.commons import functions +from deepface.models.FacialRecognition import FacialRecognition from deepface.commons.logger import Logger logger = Logger(module="commons.realtime") @@ -32,12 +32,13 @@ def analysis( enable_emotion = True enable_age_gender = True # ------------------------ - # find custom values for this input set - target_size = functions.find_target_size(model_name=model_name) - # ------------------------ # build models once to store them in the memory # otherwise, they will be built after cam started and this will cause delays - DeepFace.build_model(model_name=model_name) + model: FacialRecognition = DeepFace.build_model(model_name=model_name) + + # find custom values for this input set + target_size = model.input_shape + logger.info(f"facial recognition model {model_name} is just built") if enable_face_analysis: diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index 393b8ff6..f678a2b5 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -10,9 +10,10 @@ from tqdm import tqdm # project dependencies -from deepface.commons import functions, distance as dst +from deepface.commons import distance as dst from deepface.commons.logger import Logger -from deepface.modules import representation, detection +from deepface.modules import representation, detection, modeling +from deepface.models.FacialRecognition import FacialRecognition logger = Logger(module="deepface/modules/recognition.py") @@ -89,7 +90,8 @@ def find( if os.path.isdir(db_path) is not True: raise ValueError("Passed db_path does not exist!") - target_size = functions.find_target_size(model_name=model_name) + model: FacialRecognition = modeling.build_model(model_name) + target_size = model.input_shape # --------------------------------------- diff --git a/deepface/modules/representation.py b/deepface/modules/representation.py index 3a389ebb..2b76835a 100644 --- a/deepface/modules/representation.py +++ b/deepface/modules/representation.py @@ -7,7 +7,6 @@ # project dependencies from deepface.modules import modeling, detection, preprocessing -from deepface.commons import functions from deepface.models.FacialRecognition import FacialRecognition @@ -61,7 +60,7 @@ def represent( # --------------------------------- # we have run pre-process in verification. so, this can be skipped if it is coming from verify. - target_size = functions.find_target_size(model_name=model_name) + target_size = model.input_shape if detector_backend != "skip": img_objs = detection.extract_faces( img_path=img_path, diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py index 26281fb1..9310e82b 100644 --- a/deepface/modules/verification.py +++ b/deepface/modules/verification.py @@ -6,8 +6,9 @@ import numpy as np # project dependencies -from deepface.commons import functions, distance as dst -from deepface.modules import representation, detection +from deepface.commons import distance as dst +from deepface.modules import representation, detection, modeling +from deepface.models.FacialRecognition import FacialRecognition def verify( @@ -79,7 +80,8 @@ def verify( tic = time.time() # -------------------------------- - target_size = functions.find_target_size(model_name=model_name) + model: FacialRecognition = modeling.build_model(model_name) + target_size = model.input_shape # img pairs might have many faces img1_objs = detection.extract_faces( diff --git a/tests/face-recognition-how.py b/tests/face-recognition-how.py index 29032370..36ec45ba 100644 --- a/tests/face-recognition-how.py +++ b/tests/face-recognition-how.py @@ -1,7 +1,8 @@ import matplotlib.pyplot as plt import numpy as np from deepface import DeepFace -from deepface.commons import functions +from deepface.commons import distance +from deepface.models.FacialRecognition import FacialRecognition from deepface.commons.logger import Logger logger = Logger() @@ -11,9 +12,9 @@ model_name = "VGG-Face" -model = DeepFace.build_model(model_name=model_name) +model: FacialRecognition = DeepFace.build_model(model_name=model_name) -target_size = functions.find_target_size(model_name) +target_size = model.input_shape logger.info(f"target_size: {target_size}") @@ -22,21 +23,34 @@ img1 = DeepFace.extract_faces(img_path="dataset/img1.jpg", target_size=target_size)[0]["face"] img1 = np.expand_dims(img1, axis=0) # to (1, 224, 224, 3) -img1_representation = model.predict(img1)[0, :] +img1_representation = model.find_embeddings(img1) img2 = DeepFace.extract_faces(img_path="dataset/img3.jpg", target_size=target_size)[0]["face"] img2 = np.expand_dims(img2, axis=0) -img2_representation = model.predict(img2)[0, :] +img2_representation = model.find_embeddings(img2) -# ---------------------------------------------- -# distance between two images +img1_representation = np.array(img1_representation) +img2_representation = np.array(img2_representation) +# ---------------------------------------------- +# distance between two images - euclidean distance formula distance_vector = np.square(img1_representation - img2_representation) -logger.debug(distance_vector) - -distance = np.sqrt(distance_vector.sum()) -logger.info(f"Euclidean distance: {distance}") - +current_distance = np.sqrt(distance_vector.sum()) +logger.info(f"Euclidean distance: {current_distance}") + +threshold = distance.findThreshold(model_name=model_name, distance_metric="euclidean") +logger.info(f"Threshold for {model_name}-euclidean pair is {threshold}") + +if current_distance < threshold: + logger.info( + f"This pair is same person because its distance {current_distance}" + f" is less than threshold {threshold}" + ) +else: + logger.info( + f"This pair is different persons because its distance {current_distance}" + f" is greater than threshold {threshold}" + ) # ---------------------------------------------- # expand vectors to be shown better in graph @@ -75,7 +89,7 @@ plt.colorbar() ax5 = fig.add_subplot(3, 2, 5) -plt.text(0.35, 0, f"Distance: {distance}") +plt.text(0.35, 0, f"Distance: {current_distance}") plt.axis("off") ax6 = fig.add_subplot(3, 2, 6) From cefb8013bbdda839247d8cdeb39abfc242b5e478 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Wed, 31 Jan 2024 19:44:51 +0000 Subject: [PATCH 5/7] making distance calculation functions camel case --- deepface/commons/distance.py | 6 +++--- deepface/modules/recognition.py | 8 ++++---- deepface/modules/verification.py | 8 ++++---- tests/face-recognition-how.py | 2 +- tests/test_find.py | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/deepface/commons/distance.py b/deepface/commons/distance.py index 6048b9e4..950a7a23 100644 --- a/deepface/commons/distance.py +++ b/deepface/commons/distance.py @@ -2,7 +2,7 @@ import numpy as np -def findCosineDistance( +def find_cosine_distance( source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list] ) -> np.float64: if isinstance(source_representation, list): @@ -17,7 +17,7 @@ def findCosineDistance( return 1 - (a / (np.sqrt(b) * np.sqrt(c))) -def findEuclideanDistance( +def find_euclidean_distance( source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list] ) -> np.float64: if isinstance(source_representation, list): @@ -38,7 +38,7 @@ def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray: return x / np.sqrt(np.sum(np.multiply(x, x))) -def findThreshold(model_name: str, distance_metric: str) -> float: +def find_threshold(model_name: str, distance_metric: str) -> float: base_threshold = {"cosine": 0.40, "euclidean": 0.55, "euclidean_l2": 0.75} diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index f678a2b5..6c21e350 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -249,11 +249,11 @@ def find( ) if distance_metric == "cosine": - distance = dst.findCosineDistance(source_representation, target_representation) + distance = dst.find_cosine_distance(source_representation, target_representation) elif distance_metric == "euclidean": - distance = dst.findEuclideanDistance(source_representation, target_representation) + distance = dst.find_euclidean_distance(source_representation, target_representation) elif distance_metric == "euclidean_l2": - distance = dst.findEuclideanDistance( + distance = dst.find_euclidean_distance( dst.l2_normalize(source_representation), dst.l2_normalize(target_representation), ) @@ -263,7 +263,7 @@ def find( distances.append(distance) # --------------------------- - target_threshold = threshold or dst.findThreshold(model_name, distance_metric) + target_threshold = threshold or dst.find_threshold(model_name, distance_metric) result_df["threshold"] = target_threshold result_df["distance"] = distances diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py index 9310e82b..ab6c9dd5 100644 --- a/deepface/modules/verification.py +++ b/deepface/modules/verification.py @@ -133,11 +133,11 @@ def verify( img2_representation = img2_embedding_obj[0]["embedding"] if distance_metric == "cosine": - distance = dst.findCosineDistance(img1_representation, img2_representation) + distance = dst.find_cosine_distance(img1_representation, img2_representation) elif distance_metric == "euclidean": - distance = dst.findEuclideanDistance(img1_representation, img2_representation) + distance = dst.find_euclidean_distance(img1_representation, img2_representation) elif distance_metric == "euclidean_l2": - distance = dst.findEuclideanDistance( + distance = dst.find_euclidean_distance( dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation) ) else: @@ -147,7 +147,7 @@ def verify( regions.append((img1_region, img2_region)) # ------------------------------- - threshold = dst.findThreshold(model_name, distance_metric) + threshold = dst.find_threshold(model_name, distance_metric) distance = min(distances) # best distance facial_areas = regions[np.argmin(distances)] diff --git a/tests/face-recognition-how.py b/tests/face-recognition-how.py index 36ec45ba..09ad8cf2 100644 --- a/tests/face-recognition-how.py +++ b/tests/face-recognition-how.py @@ -38,7 +38,7 @@ current_distance = np.sqrt(distance_vector.sum()) logger.info(f"Euclidean distance: {current_distance}") -threshold = distance.findThreshold(model_name=model_name, distance_metric="euclidean") +threshold = distance.find_threshold(model_name=model_name, distance_metric="euclidean") logger.info(f"Threshold for {model_name}-euclidean pair is {threshold}") if current_distance < threshold: diff --git a/tests/test_find.py b/tests/test_find.py index 8d9e34f9..1d3faa72 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -6,7 +6,7 @@ logger = Logger("tests/test_find.py") -threshold = distance.findThreshold(model_name="VGG-Face", distance_metric="cosine") +threshold = distance.find_threshold(model_name="VGG-Face", distance_metric="cosine") def test_find_with_exact_path(): From 9494d47e315199cf9cc662d5efb496a8efcf34dd Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Wed, 31 Jan 2024 19:51:58 +0000 Subject: [PATCH 6/7] restoration of deprecated detect face function --- deepface/DeepFace.py | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index 7e55de8e..4b395724 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -460,3 +460,49 @@ def cli() -> None: import fire fire.Fire() + + +# deprecated function(s) + + +def detectFace( + img_path: Union[str, np.ndarray], + target_size: tuple = (224, 224), + detector_backend: str = "opencv", + enforce_detection: bool = True, + align: bool = True, +) -> Union[np.ndarray, None]: + """ + Deprecated face detection function. Use extract_faces for same functionality. + + Args: + img_path (str or np.ndarray): Path to the first image. Accepts exact image path + as a string, numpy array (BGR), or base64 encoded images. + + target_size (tuple): final shape of facial image. black pixels will be + added to resize the image (default is (224, 224)). + + detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', + 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). + + enforce_detection (boolean): If no face is detected in an image, raise an exception. + Set to False to avoid the exception for low-resolution images (default is True). + + align (bool): Flag to enable face alignment (default is True). + + Returns: + img (np.ndarray): detected (and aligned) facial area image as numpy array + """ + logger.warn("Function detectFace is deprecated. Use extract_faces instead.") + face_objs = extract_faces( + img_path=img_path, + target_size=target_size, + detector_backend=detector_backend, + enforce_detection=enforce_detection, + align=align, + grayscale=False, + ) + extracted_face = None + if len(face_objs) > 0: + extracted_face = face_objs[0]["face"] + return extracted_face From 96d29ab069346ba8ea0ab6b053b2033c8d899395 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Wed, 31 Jan 2024 23:43:30 +0000 Subject: [PATCH 7/7] adding expand percentage argument for detection --- deepface/DeepFace.py | 20 +++++++++++ deepface/detectors/DetectorWrapper.py | 31 +++++++++++++---- deepface/detectors/Dlib.py | 25 +++++++++++--- deepface/detectors/FastMtCnn.py | 29 +++++++++++++--- deepface/detectors/MediaPipe.py | 29 +++++++++++++--- deepface/detectors/MtCnn.py | 29 +++++++++++++--- deepface/detectors/OpenCv.py | 27 ++++++++++++--- deepface/detectors/RetinaFace.py | 30 ++++++++++++----- deepface/detectors/Ssd.py | 48 +++++++++++++++++---------- deepface/detectors/Yolo.py | 27 ++++++++++++--- deepface/detectors/YuNet.py | 29 +++++++++++++--- deepface/models/Detector.py | 27 ++++++++++----- deepface/modules/demography.py | 4 +++ deepface/modules/detection.py | 10 +++++- deepface/modules/recognition.py | 19 +++++++++-- deepface/modules/representation.py | 4 +++ deepface/modules/verification.py | 5 +++ 17 files changed, 314 insertions(+), 79 deletions(-) diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index 4b395724..f3d4f693 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -58,6 +58,7 @@ def verify( distance_metric: str = "cosine", enforce_detection: bool = True, align: bool = True, + expand_percentage: int = 0, normalization: str = "base", ) -> Dict[str, Any]: """ @@ -83,6 +84,8 @@ def verify( align (bool): Flag to enable face alignment (default is True). + expand_percentage (int): expand detected facial area with a percentage (default is 0). + normalization (string): Normalize the input image before feeding it to the model. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base) @@ -119,6 +122,7 @@ def verify( distance_metric=distance_metric, enforce_detection=enforce_detection, align=align, + expand_percentage=expand_percentage, normalization=normalization, ) @@ -129,6 +133,7 @@ def analyze( enforce_detection: bool = True, detector_backend: str = "opencv", align: bool = True, + expand_percentage: int = 0, silent: bool = False, ) -> List[Dict[str, Any]]: """ @@ -152,6 +157,8 @@ def analyze( align (boolean): Perform alignment based on the eye positions (default is True). + expand_percentage (int): expand detected facial area with a percentage (default is 0). + silent (boolean): Suppress or allow some log messages for a quieter analysis process (default is False). @@ -209,6 +216,7 @@ def analyze( enforce_detection=enforce_detection, detector_backend=detector_backend, align=align, + expand_percentage=expand_percentage, silent=silent, ) @@ -221,6 +229,7 @@ def find( enforce_detection: bool = True, detector_backend: str = "opencv", align: bool = True, + expand_percentage: int = 0, threshold: Optional[float] = None, normalization: str = "base", silent: bool = False, @@ -249,6 +258,8 @@ def find( align (boolean): Perform alignment based on the eye positions (default is True). + expand_percentage (int): expand detected facial area with a percentage (default is 0). + threshold (float): Specify a threshold to determine whether a pair represents the same person or different individuals. This threshold is used for comparing distances. If left unset, default pre-tuned threshold values will be applied based on the specified @@ -286,6 +297,7 @@ def find( enforce_detection=enforce_detection, detector_backend=detector_backend, align=align, + expand_percentage=expand_percentage, threshold=threshold, normalization=normalization, silent=silent, @@ -298,6 +310,7 @@ def represent( enforce_detection: bool = True, detector_backend: str = "opencv", align: bool = True, + expand_percentage: int = 0, normalization: str = "base", ) -> List[Dict[str, Any]]: """ @@ -320,6 +333,8 @@ def represent( align (boolean): Perform alignment based on the eye positions (default is True). + expand_percentage (int): expand detected facial area with a percentage (default is 0). + normalization (string): Normalize the input image before feeding it to the model. Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base). @@ -346,6 +361,7 @@ def represent( enforce_detection=enforce_detection, detector_backend=detector_backend, align=align, + expand_percentage=expand_percentage, normalization=normalization, ) @@ -409,6 +425,7 @@ def extract_faces( detector_backend: str = "opencv", enforce_detection: bool = True, align: bool = True, + expand_percentage: int = 0, grayscale: bool = False, ) -> List[Dict[str, Any]]: """ @@ -429,6 +446,8 @@ def extract_faces( align (bool): Flag to enable face alignment (default is True). + expand_percentage (int): expand detected facial area with a percentage (default is 0). + grayscale (boolean): Flag to convert the image to grayscale before processing (default is False). @@ -448,6 +467,7 @@ def extract_faces( detector_backend=detector_backend, enforce_detection=enforce_detection, align=align, + expand_percentage=expand_percentage, grayscale=grayscale, human_readable=True, ) diff --git a/deepface/detectors/DetectorWrapper.py b/deepface/detectors/DetectorWrapper.py index 54bc7a50..196fac3c 100644 --- a/deepface/detectors/DetectorWrapper.py +++ b/deepface/detectors/DetectorWrapper.py @@ -12,6 +12,9 @@ Yolo, YuNet, ) +from deepface.commons.logger import Logger + +logger = Logger(module="deepface/detectors/DetectorWrapper.py") def build_model(detector_backend: str) -> Any: @@ -52,19 +55,35 @@ def build_model(detector_backend: str) -> Any: return face_detector_obj[detector_backend] -def detect_faces(detector_backend: str, img: np.ndarray, align: bool = True) -> List[DetectedFace]: +def detect_faces( + detector_backend: str, img: np.ndarray, align: bool = True, expand_percentage: int = 0 +) -> List[DetectedFace]: """ Detect face(s) from a given image Args: detector_backend (str): detector name + img (np.ndarray): pre-loaded image - alig (bool): enable or disable alignment after detection + + align (bool): enable or disable alignment after detection + + expand_percentage (int): expand detected facial area with a percentage (default is 0). + Returns: results (List[DetectedFace]): A list of DetectedFace objects where each object contains: - - img (np.ndarray): The detected face as a NumPy array. - - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h - - confidence (float): The confidence score associated with the detected face. + + - img (np.ndarray): The detected face as a NumPy array. + + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + + - confidence (float): The confidence score associated with the detected face. """ face_detector: Detector = build_model(detector_backend) - return face_detector.detect_faces(img=img, align=align) + if expand_percentage < 0: + logger.warn( + f"Expand percentage cannot be negative but you set it to {expand_percentage}." + "Overwritten it to 0." + ) + expand_percentage = 0 + return face_detector.detect_faces(img=img, align=align, expand_percentage=expand_percentage) diff --git a/deepface/detectors/Dlib.py b/deepface/detectors/Dlib.py index a64068ac..9a66b06c 100644 --- a/deepface/detectors/Dlib.py +++ b/deepface/detectors/Dlib.py @@ -56,18 +56,27 @@ def build_model(self) -> dict: detector["sp"] = sp return detector - def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with dlib + Args: - face_detector (Any): dlib face detector object - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: - results (List[DetectedFace]): A list of DetectedFace objects + results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ # this is not a must dependency. do not import it in the global level. @@ -79,6 +88,12 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace "Please install using 'pip install dlib' " ) from e + if expand_percentage != 0: + logger.warn( + f"You set expand_percentage argument to {expand_percentage}," + "but dlib hog handles detection by itself" + ) + resp = [] sp = self.model["sp"] diff --git a/deepface/detectors/FastMtCnn.py b/deepface/detectors/FastMtCnn.py index a4942ee0..c6fd5229 100644 --- a/deepface/detectors/FastMtCnn.py +++ b/deepface/detectors/FastMtCnn.py @@ -12,17 +12,27 @@ class FastMtCnnClient(Detector): def __init__(self): self.model = self.build_model() - def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with mtcnn + Args: - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: - results (List[DetectedFace]): A list of DetectedFace objects + results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ resp = [] @@ -37,7 +47,16 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace for current_detection in zip(*detections): x, y, w, h = xyxy_to_xywh(current_detection[0]) - detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + + # expand the facial area to be extracted and stay within img.shape limits + x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left + y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top + w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + + # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] + img_region = FacialAreaRegion(x=x, y=y, w=w, h=h) confidence = current_detection[1] diff --git a/deepface/detectors/MediaPipe.py b/deepface/detectors/MediaPipe.py index e9f112e3..fe6b7de0 100644 --- a/deepface/detectors/MediaPipe.py +++ b/deepface/detectors/MediaPipe.py @@ -29,17 +29,27 @@ def build_model(self) -> Any: face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7) return face_detection - def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with mediapipe + Args: - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: - results (List[DetectedFace): A list of DetectedFace objects + results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ resp = [] @@ -74,7 +84,16 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace # left_ear = (int(landmarks[5].x * img_width), int(landmarks[5].y * img_height)) if x > 0 and y > 0: - detected_face = img[y : y + h, x : x + w] + + # expand the facial area to be extracted and stay within img.shape limits + x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left + y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top + w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + + # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] + img_region = FacialAreaRegion(x=x, y=y, w=w, h=h) if align: diff --git a/deepface/detectors/MtCnn.py b/deepface/detectors/MtCnn.py index e1608d1a..308574f1 100644 --- a/deepface/detectors/MtCnn.py +++ b/deepface/detectors/MtCnn.py @@ -13,17 +13,27 @@ class MtCnnClient(Detector): def __init__(self): self.model = MTCNN() - def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with mtcnn + Args: - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: - results (List[DetectedFace]): A list of DetectedFace objects + results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ @@ -40,7 +50,16 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace for current_detection in detections: x, y, w, h = current_detection["box"] - detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + + # expand the facial area to be extracted and stay within img.shape limits + x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left + y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top + w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + + # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] + img_region = FacialAreaRegion(x=x, y=y, w=w, h=h) confidence = current_detection["confidence"] diff --git a/deepface/detectors/OpenCv.py b/deepface/detectors/OpenCv.py index 353892c0..eafdb071 100644 --- a/deepface/detectors/OpenCv.py +++ b/deepface/detectors/OpenCv.py @@ -25,18 +25,27 @@ def build_model(self): detector["eye_detector"] = self.__build_cascade("haarcascade_eye") return detector - def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with opencv + Args: - face_detector (Any): opencv face detector object - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ resp = [] @@ -56,7 +65,15 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace if len(faces) > 0: for (x, y, w, h), confidence in zip(faces, scores): - detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + + # expand the facial area to be extracted and stay within img.shape limits + x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left + y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top + w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + + # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] if align: left_eye, right_eye = self.find_eyes(img=detected_face) diff --git a/deepface/detectors/RetinaFace.py b/deepface/detectors/RetinaFace.py index 16b30c07..632d5e3f 100644 --- a/deepface/detectors/RetinaFace.py +++ b/deepface/detectors/RetinaFace.py @@ -9,17 +9,27 @@ class RetinaFaceClient(Detector): def __init__(self): self.model = rf.build_model() - def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with retinaface + Args: - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: - results (List[DetectedFace]): A list of DetectedFace object + results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ resp = [] @@ -38,10 +48,14 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace img_region = FacialAreaRegion(x=x, y=y, w=w, h=h) confidence = identity["score"] - # detected_face = img[int(y):int(y+h), int(x):int(x+w)] #opencv - detected_face = img[ - facial_area[1] : facial_area[3], facial_area[0] : facial_area[2] - ] + # expand the facial area to be extracted and stay within img.shape limits + x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left + y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top + w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + + # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] if align: landmarks = identity["landmarks"] diff --git a/deepface/detectors/Ssd.py b/deepface/detectors/Ssd.py index 8775b517..0c6ee9c1 100644 --- a/deepface/detectors/Ssd.py +++ b/deepface/detectors/Ssd.py @@ -71,17 +71,27 @@ def build_model(self) -> dict: return detector - def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with ssd + Args: - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: - results (List[DetectedFace]): A list of DetectedFace object + results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ resp = [] @@ -92,16 +102,14 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace target_size = (300, 300) - base_img = img.copy() # we will restore base_img to img later - original_size = img.shape - img = cv2.resize(img, target_size) + current_img = cv2.resize(img, target_size) aspect_ratio_x = original_size[1] / target_size[1] aspect_ratio_y = original_size[0] / target_size[0] - imageBlob = cv2.dnn.blobFromImage(image=img) + imageBlob = cv2.dnn.blobFromImage(image=current_img) face_detector = self.model["face_detector"] face_detector.setInput(imageBlob) @@ -126,17 +134,21 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace bottom = instance["bottom"] top = instance["top"] - detected_face = base_img[ - int(top * aspect_ratio_y) : int(bottom * aspect_ratio_y), - int(left * aspect_ratio_x) : int(right * aspect_ratio_x), - ] + x = int(left * aspect_ratio_x) + y = int(top * aspect_ratio_y) + w = int(right * aspect_ratio_x) - int(left * aspect_ratio_x) + h = int(bottom * aspect_ratio_y) - int(top * aspect_ratio_y) - face_region = FacialAreaRegion( - x=int(left * aspect_ratio_x), - y=int(top * aspect_ratio_y), - w=int(right * aspect_ratio_x) - int(left * aspect_ratio_x), - h=int(bottom * aspect_ratio_y) - int(top * aspect_ratio_y), - ) + # expand the facial area to be extracted and stay within img.shape limits + x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left + y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top + w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + + detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] + + face_region = FacialAreaRegion(x=x, y=y, w=w, h=h) confidence = instance["confidence"] diff --git a/deepface/detectors/Yolo.py b/deepface/detectors/Yolo.py index 79e1eaa7..e23d3c17 100644 --- a/deepface/detectors/Yolo.py +++ b/deepface/detectors/Yolo.py @@ -51,18 +51,27 @@ def build_model(self) -> Any: # Return face_detector return YOLO(weight_path) - def detect_faces(self, img: np.ndarray, align: bool = False) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = False, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with yolo + Args: - face_detector (Any): yolo face detector object - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ resp = [] @@ -78,7 +87,15 @@ def detect_faces(self, img: np.ndarray, align: bool = False) -> List[DetectedFac x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h) region = FacialAreaRegion(x=x, y=y, w=w, h=h) - detected_face = img[y : y + h, x : x + w].copy() + + # expand the facial area to be extracted and stay within img.shape limits + x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left + y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top + w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + + # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] if align: # Tuple of x,y and confidence for left eye diff --git a/deepface/detectors/YuNet.py b/deepface/detectors/YuNet.py index d079428f..f7728b6d 100644 --- a/deepface/detectors/YuNet.py +++ b/deepface/detectors/YuNet.py @@ -49,17 +49,27 @@ def build_model(self) -> Any: ) from err return face_detector - def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List[DetectedFace]: """ Detect and align face with yunet + Args: - img (np.ndarray): pre-loaded image - align (bool): default is true + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: - results (List[DetectedFace]): A list of DetectedFace objects + results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: + - img (np.ndarray): The detected face as a NumPy array. + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - confidence (float): The confidence score associated with the detected face. """ # FaceDetector.detect_faces does not support score_threshold parameter. @@ -115,7 +125,16 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace ) confidence = face[-1] confidence = f"{confidence:.2f}" - detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + + # expand the facial area to be extracted and stay within img.shape limits + x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left + y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top + w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + + # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] + img_region = FacialAreaRegion(x=x, y=y, w=w, h=h) if align: detected_face = detection.align_face(detected_face, (x_re, y_re), (x_le, y_le)) diff --git a/deepface/models/Detector.py b/deepface/models/Detector.py index 52d562a0..bd369d83 100644 --- a/deepface/models/Detector.py +++ b/deepface/models/Detector.py @@ -8,19 +8,28 @@ # pylint: disable=unnecessary-pass, too-few-public-methods class Detector(ABC): @abstractmethod - def detect_faces(self, img: np.ndarray, align: bool = True) -> List["DetectedFace"]: + def detect_faces( + self, img: np.ndarray, align: bool = True, expand_percentage: int = 0 + ) -> List["DetectedFace"]: """ - Detect faces from a given image + Interface for detect and align face + Args: - img (np.ndarray): pre-loaded image as a NumPy array - align (bool): enable or disable alignment after face detection + img (np.ndarray): pre-loaded image as numpy array + + align (bool): flag to enable or disable alignment after detection (default is True) + + expand_percentage (int): expand detected facial area with a percentage + Returns: - results (List[DetectedFace]): A list of DetectedFace object + results (List[Tuple[DetectedFace]): A list of DetectedFace objects where each object contains: - - face (np.ndarray): The detected face as a NumPy array. - - face_region (List[float]): The image region represented as - a list of floats e.g. [x, y, w, h] - - confidence (float): The confidence score associated with the detected face. + + - img (np.ndarray): The detected face as a NumPy array. + + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + + - confidence (float): The confidence score associated with the detected face. """ pass diff --git a/deepface/modules/demography.py b/deepface/modules/demography.py index 3372b69f..9f4516a2 100644 --- a/deepface/modules/demography.py +++ b/deepface/modules/demography.py @@ -16,6 +16,7 @@ def analyze( enforce_detection: bool = True, detector_backend: str = "opencv", align: bool = True, + expand_percentage: int = 0, silent: bool = False, ) -> List[Dict[str, Any]]: """ @@ -40,6 +41,8 @@ def analyze( align (boolean): Perform alignment based on the eye positions (default is True). + expand_percentage (int): expand detected facial area with a percentage (default is 0). + silent (boolean): Suppress or allow some log messages for a quieter analysis process (default is False). @@ -120,6 +123,7 @@ def analyze( grayscale=False, enforce_detection=enforce_detection, align=align, + expand_percentage=expand_percentage, ) for img_obj in img_objs: diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py index 2085d764..97763b34 100644 --- a/deepface/modules/detection.py +++ b/deepface/modules/detection.py @@ -31,6 +31,7 @@ def extract_faces( detector_backend: str = "opencv", enforce_detection: bool = True, align: bool = True, + expand_percentage: int = 0, grayscale: bool = False, human_readable=False, ) -> List[Dict[str, Any]]: @@ -52,6 +53,8 @@ def extract_faces( align (bool): Flag to enable face alignment (default is True). + expand_percentage (int): expand detected facial area with a percentage + grayscale (boolean): Flag to convert the image to grayscale before processing (default is False). @@ -75,7 +78,12 @@ def extract_faces( if detector_backend == "skip": face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)] else: - face_objs = DetectorWrapper.detect_faces(detector_backend, img, align) + face_objs = DetectorWrapper.detect_faces( + detector_backend=detector_backend, + img=img, + align=align, + expand_percentage=expand_percentage, + ) # in case of no face found if len(face_objs) == 0 and enforce_detection is True: diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index 6c21e350..04fbb673 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -26,6 +26,7 @@ def find( enforce_detection: bool = True, detector_backend: str = "opencv", align: bool = True, + expand_percentage: int = 0, threshold: Optional[float] = None, normalization: str = "base", silent: bool = False, @@ -55,6 +56,8 @@ def find( align (boolean): Perform alignment based on the eye positions. + expand_percentage (int): expand detected facial area with a percentage (default is 0). + threshold (float): Specify a threshold to determine whether a pair represents the same person or different individuals. This threshold is used for comparing distances. If left unset, default pre-tuned threshold values will be applied based on the specified @@ -211,6 +214,7 @@ def find( grayscale=False, enforce_detection=enforce_detection, align=align, + expand_percentage=expand_percentage, ) resp_obj = [] @@ -309,6 +313,7 @@ def __find_bulk_embeddings( detector_backend: str = "opencv", enforce_detection: bool = True, align: bool = True, + expand_percentage: int = 0, normalization: str = "base", silent: bool = False, ): @@ -317,15 +322,24 @@ def __find_bulk_embeddings( Args: employees (list): list of exact image paths + model_name (str): facial recognition model name - target_size (tuple): expected input shape of facial - recognition model + + target_size (tuple): expected input shape of facial recognition model + detector_backend (str): face detector model name + enforce_detection (bool): set this to False if you want to proceed when you cannot detect any face + align (bool): enable or disable alignment of image before feeding to facial recognition model + + expand_percentage (int): expand detected facial area with a + percentage (default is 0). + normalization (bool): normalization technique + silent (bool): enable or disable informative logging Returns: representations (list): pivot list of embeddings with @@ -344,6 +358,7 @@ def __find_bulk_embeddings( grayscale=False, enforce_detection=enforce_detection, align=align, + expand_percentage=expand_percentage, ) for img_obj in img_objs: diff --git a/deepface/modules/representation.py b/deepface/modules/representation.py index 2b76835a..00e65a1e 100644 --- a/deepface/modules/representation.py +++ b/deepface/modules/representation.py @@ -16,6 +16,7 @@ def represent( enforce_detection: bool = True, detector_backend: str = "opencv", align: bool = True, + expand_percentage: int = 0, normalization: str = "base", ) -> List[Dict[str, Any]]: """ @@ -37,6 +38,8 @@ def represent( align (boolean): Perform alignment based on the eye positions. + expand_percentage (int): expand detected facial area with a percentage (default is 0). + normalization (string): Normalize the input image before feeding it to the model. Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace @@ -69,6 +72,7 @@ def represent( grayscale=False, enforce_detection=enforce_detection, align=align, + expand_percentage=expand_percentage, ) else: # skip # Try load. If load error, will raise exception internal diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py index ab6c9dd5..e014ed39 100644 --- a/deepface/modules/verification.py +++ b/deepface/modules/verification.py @@ -19,6 +19,7 @@ def verify( distance_metric: str = "cosine", enforce_detection: bool = True, align: bool = True, + expand_percentage: int = 0, normalization: str = "base", ) -> Dict[str, Any]: """ @@ -49,6 +50,8 @@ def verify( align (bool): Flag to enable face alignment (default is True). + expand_percentage (int): expand detected facial area with a percentage (default is 0). + normalization (string): Normalize the input image before feeding it to the model. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base) @@ -91,6 +94,7 @@ def verify( grayscale=False, enforce_detection=enforce_detection, align=align, + expand_percentage=expand_percentage, ) img2_objs = detection.extract_faces( @@ -100,6 +104,7 @@ def verify( grayscale=False, enforce_detection=enforce_detection, align=align, + expand_percentage=expand_percentage, ) # -------------------------------- distances = []