diff --git a/models/face_recognition_sface/README.md b/models/face_recognition_sface/README.md index e7b38c7b..10ab8137 100644 --- a/models/face_recognition_sface/README.md +++ b/models/face_recognition_sface/README.md @@ -7,6 +7,16 @@ Note: - [face_recognition_sface_2021sep.onnx](./face_recognition_sface_2021sep.onnx) is converted from the model from https://github.com/zhongyy/SFace thanks to [Chengrui Wang](https://github.com/crywang). - Support 5-landmark warpping for now (2021sep) +Results of accuracy evaluation with [tools/eval](../../tools/eval). + +| Models | Accuracy | +|-------------|----------| +| SFace | 0.9940 | +| SFace quant | 0.9932 | + +\*: 'quant' stands for 'quantized'. + + ## Demo ***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep). @@ -17,6 +27,7 @@ Run the following command to try the demo: python demo.py --input1 /path/to/image1 --input2 /path/to/image2 ``` + ## License All files in this directory are licensed under [Apache 2.0 License](./LICENSE). diff --git a/tools/eval/README.md b/tools/eval/README.md index c50600b1..fc8cfd44 100644 --- a/tools/eval/README.md +++ b/tools/eval/README.md @@ -4,6 +4,7 @@ Make sure you have the following packages installed: ```shell pip install tqdm +pip install scikit-learn pip install scipy ``` @@ -14,8 +15,10 @@ python eval.py -m model_name -d dataset_name -dr dataset_root_dir ``` Supported datasets: + - [ImageNet](#imagenet) - [WIDERFace](#widerface) +- [LFW](#lfw) ## ImageNet @@ -94,3 +97,44 @@ Run evaluation with the following command: ```shell python eval.py -m yunet -d widerface -dr /path/to/widerface ``` + +## LFW + +The script is modified based on [evaluation of InsightFace](https://github.com/deepinsight/insightface/blob/f92bf1e48470fdd567e003f196f8ff70461f7a20/src/eval/lfw.py). + +This evaluation uses [YuNet](../../models/face_detection_yunet) as face detector. The structure of the face bounding boxes saved in [lfw_face_bboxes.npy](../eval/datasets/lfw_face_bboxes.npy) is shown below. +Each row represents the bounding box of the main face that will be used in each image. + +```shell +[ + [x, y, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt, x_rcm, y_rcm, x_lcm, y_lcm], + ... + [x, y, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt, x_rcm, y_rcm, x_lcm, y_lcm] +] +``` + +`x1, y1, w, h` are the top-left coordinates, width and height of the face bounding box, `{x, y}_{re, le, nt, rcm, lcm}` stands for the coordinates of right eye, left eye, nose tip, the right corner and left corner of the mouth respectively. Data type of this numpy array is `np.float32`. + + +### Prepare data + +Please visit http://vis-www.cs.umass.edu/lfw to download the LFW [all images](http://vis-www.cs.umass.edu/lfw/lfw.tgz)(needs to be decompressed) and [pairs.txt](http://vis-www.cs.umass.edu/lfw/pairs.txt)(needs to be placed in the `view2` folder). Organize files as follow: + +```shell +$ tree -L 2 /path/to/lfw +. +├── lfw +│   ├── Aaron_Eckhart +│   ├── ... +│   └── Zydrunas_Ilgauskas +└── view2 +    └── pairs.txt +``` + +### Evaluation + +Run evaluation with the following command: + +```shell +python eval.py -m sface -d lfw -dr /path/to/lfw +``` \ No newline at end of file diff --git a/tools/eval/datasets/__init__.py b/tools/eval/datasets/__init__.py index deff3daf..d650cd28 100644 --- a/tools/eval/datasets/__init__.py +++ b/tools/eval/datasets/__init__.py @@ -1,5 +1,6 @@ from .imagenet import ImageNet from .widerface import WIDERFace +from .lfw import LFW class Registery: def __init__(self, name): @@ -15,3 +16,4 @@ def register(self, item): DATASETS = Registery("Datasets") DATASETS.register(ImageNet) DATASETS.register(WIDERFace) +DATASETS.register(LFW) \ No newline at end of file diff --git a/tools/eval/datasets/lfw.py b/tools/eval/datasets/lfw.py new file mode 100644 index 00000000..c001b3f9 --- /dev/null +++ b/tools/eval/datasets/lfw.py @@ -0,0 +1,239 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import numpy as np + +from sklearn.model_selection import KFold +from scipy import interpolate +import sklearn +from sklearn.decomposition import PCA + +import cv2 as cv +from tqdm import tqdm + + +def calculate_roc(thresholds, + embeddings1, + embeddings2, + actual_issame, + nrof_folds=10, + pca=0): + assert (embeddings1.shape[0] == embeddings2.shape[0]) + assert (embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = KFold(n_splits=nrof_folds, shuffle=False) + + tprs = np.zeros((nrof_folds, nrof_thresholds)) + fprs = np.zeros((nrof_folds, nrof_thresholds)) + accuracy = np.zeros((nrof_folds)) + indices = np.arange(nrof_pairs) + # print('pca', pca) + + if pca == 0: + diff = np.subtract(embeddings1, embeddings2) + dist = np.sum(np.square(diff), 1) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + # print('train_set', train_set) + # print('test_set', test_set) + if pca > 0: + print('doing pca on', fold_idx) + embed1_train = embeddings1[train_set] + embed2_train = embeddings2[train_set] + _embed_train = np.concatenate((embed1_train, embed2_train), axis=0) + # print(_embed_train.shape) + pca_model = PCA(n_components=pca) + pca_model.fit(_embed_train) + embed1 = pca_model.transform(embeddings1) + embed2 = pca_model.transform(embeddings2) + embed1 = sklearn.preprocessing.normalize(embed1) + embed2 = sklearn.preprocessing.normalize(embed2) + # print(embed1.shape, embed2.shape) + diff = np.subtract(embed1, embed2) + dist = np.sum(np.square(diff), 1) + + # Find the best threshold for the fold + acc_train = np.zeros((nrof_thresholds)) + for threshold_idx, threshold in enumerate(thresholds): + _, _, acc_train[threshold_idx] = calculate_accuracy( + threshold, dist[train_set], actual_issame[train_set]) + best_threshold_index = np.argmax(acc_train) + for threshold_idx, threshold in enumerate(thresholds): + tprs[fold_idx, + threshold_idx], fprs[fold_idx, + threshold_idx], _ = calculate_accuracy( + threshold, dist[test_set], + actual_issame[test_set]) + _, _, accuracy[fold_idx] = calculate_accuracy( + thresholds[best_threshold_index], dist[test_set], + actual_issame[test_set]) + + tpr = np.mean(tprs, 0) + fpr = np.mean(fprs, 0) + return tpr, fpr, accuracy + + +def calculate_accuracy(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + tp = np.sum(np.logical_and(predict_issame, actual_issame)) + fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) + tn = np.sum( + np.logical_and(np.logical_not(predict_issame), + np.logical_not(actual_issame))) + fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) + + tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn) + fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn) + acc = float(tp + tn) / dist.size + return tpr, fpr, acc + + +def calculate_val(thresholds, + embeddings1, + embeddings2, + actual_issame, + far_target, + nrof_folds=10): + assert (embeddings1.shape[0] == embeddings2.shape[0]) + assert (embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = KFold(n_splits=nrof_folds, shuffle=False) + + val = np.zeros(nrof_folds) + far = np.zeros(nrof_folds) + + diff = np.subtract(embeddings1, embeddings2) + dist = np.sum(np.square(diff), 1) + indices = np.arange(nrof_pairs) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + + # Find the threshold that gives FAR = far_target + far_train = np.zeros(nrof_thresholds) + for threshold_idx, threshold in enumerate(thresholds): + _, far_train[threshold_idx] = calculate_val_far( + threshold, dist[train_set], actual_issame[train_set]) + if np.max(far_train) >= far_target: + f = interpolate.interp1d(far_train, thresholds, kind='slinear') + threshold = f(far_target) + else: + threshold = 0.0 + + val[fold_idx], far[fold_idx] = calculate_val_far( + threshold, dist[test_set], actual_issame[test_set]) + + val_mean = np.mean(val) + far_mean = np.mean(far) + val_std = np.std(val) + return val_mean, val_std, far_mean + + +def calculate_val_far(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) + false_accept = np.sum( + np.logical_and(predict_issame, np.logical_not(actual_issame))) + n_same = np.sum(actual_issame) + n_diff = np.sum(np.logical_not(actual_issame)) + val = float(true_accept) / float(n_same) + far = float(false_accept) / float(n_diff) + return val, far + + +def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0): + # Calculate evaluation metrics + thresholds = np.arange(0, 4, 0.01) + embeddings1 = embeddings[0::2] + embeddings2 = embeddings[1::2] + tpr, fpr, accuracy = calculate_roc(thresholds, + embeddings1, + embeddings2, + np.asarray(actual_issame), + nrof_folds=nrof_folds, + pca=pca) + thresholds = np.arange(0, 4, 0.001) + val, val_std, far = calculate_val(thresholds, + embeddings1, + embeddings2, + np.asarray(actual_issame), + 1e-3, + nrof_folds=nrof_folds) + return tpr, fpr, accuracy, val, val_std, far + + +class LFW: + def __init__(self, root, target_size=250): + self.LFW_IMAGE_SIZE = 250 + + self.lfw_root = root + self.target_size = target_size + + self.lfw_pairs_path = os.path.join(self.lfw_root, 'view2/pairs.txt') + self.image_path_pattern = os.path.join(self.lfw_root, 'lfw', '{person_name}', '{image_name}') + + self.lfw_image_paths, self.id_list = self.load_pairs() + + @property + def name(self): + return 'LFW' + + def __len__(self): + return len(self.lfw_image_paths) + + @property + def ids(self): + return self.id_list + + def load_pairs(self): + image_paths = [] + id_list = [] + with open(self.lfw_pairs_path, 'r') as f: + for line in f.readlines()[1:]: + line = line.strip().split() + if len(line) == 3: + person_name = line[0] + image1_name = '{}_{:04d}.jpg'.format(person_name, int(line[1])) + image2_name = '{}_{:04d}.jpg'.format(person_name, int(line[2])) + image_paths += [ + self.image_path_pattern.format(person_name=person_name, image_name=image1_name), + self.image_path_pattern.format(person_name=person_name, image_name=image2_name) + ] + id_list.append(True) + elif len(line) == 4: + person1_name = line[0] + image1_name = '{}_{:04d}.jpg'.format(person1_name, int(line[1])) + person2_name = line[2] + image2_name = '{}_{:04d}.jpg'.format(person2_name, int(line[3])) + image_paths += [ + self.image_path_pattern.format(person_name=person1_name, image_name=image1_name), + self.image_path_pattern.format(person_name=person2_name, image_name=image2_name) + ] + id_list.append(False) + return image_paths, id_list + + def __getitem__(self, key): + img = cv.imread(self.lfw_image_paths[key]) + if self.target_size != self.LFW_IMAGE_SIZE: + img = cv.resize(img, (self.target_size, self.target_size)) + return img + + def eval(self, model): + ids = self.ids + embeddings = np.zeros(shape=(len(self), 128)) + face_bboxes = np.load("./datasets/lfw_face_bboxes.npy") + for idx, img in tqdm(enumerate(self), desc="Evaluating {} with {} val set".format(model.name, self.name)): + embedding = model.infer(img, face_bboxes[idx]) + embeddings[idx] = embedding + + embeddings = sklearn.preprocessing.normalize(embeddings) + self.tpr, self.fpr, self.acc, self.val, self.std, self.far = evaluate(embeddings, ids, nrof_folds=10) + self.acc, self.std = np.mean(self.acc), np.std(self.acc) + + def print_result(self): + print("==================== Results ====================") + print("Average Accuracy: {:.4f}".format(self.acc)) + print("=================================================") diff --git a/tools/eval/datasets/lfw_face_bboxes.npy b/tools/eval/datasets/lfw_face_bboxes.npy new file mode 100644 index 00000000..d3988c31 Binary files /dev/null and b/tools/eval/datasets/lfw_face_bboxes.npy differ diff --git a/tools/eval/eval.py b/tools/eval/eval.py index 2317ae61..16800925 100644 --- a/tools/eval/eval.py +++ b/tools/eval/eval.py @@ -64,7 +64,15 @@ modelPath=os.path.join(root_dir, "models/face_detection_yunet/face_detection_yunet_2022mar-act_int8-wt_int8-quantized.onnx"), topK=5000, confThreshold=0.3, - nmsThreshold=0.45) + nmsThreshold=0.45), + sface=dict( + name="SFace", + topic="face_recognition", + modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec.onnx")), + sface_q=dict( + name="SFace", + topic="face_recognition", + modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec-act_int8-wt_int8-quantized.onnx")), ) datasets = dict( @@ -74,7 +82,11 @@ size=224), widerface=dict( name="WIDERFace", - topic="face_detection") + topic="face_detection"), + lfw=dict( + name="LFW", + topic="face_recognition", + target_size=112), ) def main(args):