In [1]:
import os
import sys
import cv2
import glob
import joblib
from pathlib import Path
import numpy as np
from time import time
from queue import Queue
from collections import namedtuple

sys.path.append('/home/huy/capstone/godofeye/lib')
sys.path.append('/home/huy/capstone/godofeye/lib/yoloface')

from blueeyes.face_recognition import FaceRecognition, FeatureExtractor, ModelTraining
from blueeyes.face_detection import FaceDetector
from blueeyes.utils import Camera

DEBUG:tensorflow:Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.


Set config.gpu_options.allow_growth to True


### Face Crop from Images (Optional)

from pathlib import Path

IMAGES_DIR = '/home/huy/data/face_recog/train_test_raw/'
OUTPUT_DIR = '/home/huy/data/face_recog/train_test'

detector = FaceDetector('mtcnn', min_face_size=50)

count = 0

for img_path in glob.glob(IMAGES_DIR + '/**/*.jpg', recursive=True):
    path = Path(img_path)
    id = path.parent.name
    im = cv2.imread(str(path), 1)
    boxes = detector.detect(im)
    for left,top,right,bottom in boxes:
        crop = im[top:bottom,left:right,:]
        output_dir = OUTPUT_DIR + f'/{id}'
        output_path = output_dir + f'/{count}.jpg'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        cv2.imwrite(output_path, crop)
        print('Write to ', output_path)
        count += 1

### Create Train Test Set

In [2]:
from pathlib import Path

train_set_dict = {}
test_set_dict = {}

# TRAINSET_LOCATION = '/home/huy/Downloads/CBGVDataset_v3.2/Aug3/*/WM/*.jpg'
TESTSET_LOCATION = '/home/huy/smartbuilding/face_recog_models/dataset/CBGVDataset_v2/*/WM/test/*.jpg'
TRAINSET_LOCATION='/home/huy/Downloads/Aug6(train_bri_dark)'


for path in glob.glob(TRAINSET_LOCATION):
    path = Path(path)
    id = path.parent.parent.parent.name
    if id not in train_set_dict.keys():
        train_set_dict[id] = []
    train_set_dict[id].append(str(path)) 
for path in glob.glob(TESTSET_LOCATION):
    path = Path(path)
    id = path.parent.parent.parent.name
    if id not in test_set_dict.keys():
        test_set_dict[id] = []
    test_set_dict[id].append(str(path))

In [6]:
# auto split train test
from pathlib import Path

RATIO = 1.0

all_set_dict = {}
train_set_dict = {}
test_set_dict = {}

# TRAINSET_LOCATION = '/home/huy/Downloads/Aug5(train+WM)/Aug5/*/train/*.jpg'
# TRAINSET_LOCATION='/home/huy/Downloads/Aug6(train_bri_dark)/*/*.jpg'
TRAINSET_LOCATION = '/home/huy/Downloads/DataFaceID/DATASystem/*/*.jpg'

for path in glob.glob(TRAINSET_LOCATION, recursive=True):
    path = Path(path)
    id = path.parent.name
    if id not in all_set_dict.keys():
        all_set_dict[id] = []
    all_set_dict[id].append(str(path)) 

for label, paths in all_set_dict.items():
    n = int(len(paths)*RATIO)
    train_set_dict[label] = paths[0:n]
    test_set_dict[label] = paths[n:]

In [7]:
train_set_dict.keys()

dict_keys(['10329_bright', '10229_bright', '10225_bright', '11248', '10354', '11074', '11109_bright', '11500_bright', '10542_bright', '11226_bright', '11243_bright', '11321_bright', '11458_bright', '1641764600_dark', '193019_bright', '10872_dark', '10398_dark', '11072_bright', '11216_bright', '10349_bright', '11499_bright', '11100_dark', '11458_dark', '11102_dark', '11249_dark', '10222_bright', '10635', '10887_bright', '10220_bright', '10888_dark', '9944005072_dark', '11410', '10125_bright', '10473', '10229_dark', '10766_dark', '10307_bright', '10576_bright', '10609_bright', '11272', '11499_dark', '10766_bright', '11280_bright', '11118_dark', '10600_bright', '11409_bright', '11409_dark', '10225_dark', '11489_bright', '10542_dark', '11109_dark', '8493773300_dark', '11280_dark', '10358_dark', '11249_bright', '11245_bright', '11160', '11070_bright', '10609_dark', '11184_bright', '11321_dark', '11281_dark', '11185', '10087_dark', '11201', '10402_bright', '11456_dark', '10358_bright', '1086

### Create train and test data

In [9]:
feature_extractor = FeatureExtractor('dlib')
model_trainer = ModelTraining(feature_extractor=feature_extractor)

In [None]:
model_trainer.create_train_set(train_set_dict, output_model_location='/home/huy/face_recog/encoded_data/aug6_train', process=4)

In [None]:
model_trainer.create_train_set(test_set_dict, output_model_location='/home/huy/face_recog/encoded_data/test')

In [28]:
# LOCATION = '/home/huy/Downloads/Aug5(train+WM)/Aug5/*/WM/*.jpg'
LOCATION = '/home/huy/Downloads/CBGVDataset_v3.3(train+test)/*/WM/*.jpg'
data_dict = {}
for path in glob.glob(LOCATION):
    path = Path(path)
    id = path.parent.parent.name
    if id not in data_dict.keys():
        data_dict[id] = []
    data_dict[id].append(str(path))

In [29]:
data_dict.keys()

dict_keys(['11102', '10222', '11109', '11100', '11468', '11248', '10354', '11074', '1641764600', '10542', '10225', '10635', '11410', '10766', '11321', '10887', '11281', '10888', '10473', '10600', '11409', '10576', '11272', '10872', '9944005072', '11280', '10349', '11458', '11214', '10402', '10272', '10125', '10276', '11499', '11500', '11160', '11243', '10398', '11185', '11201', '10307', '11226', '10861', '11317', '11193', '10533', '10437', '10329', '10358', '10087', '11118', '11249', '10250', '11072', '10283', '11088', '11318', '193019', '11273', '10423', '11251', '11070', '10359', '11216', '11184', '1149822', '9563444161', '11267', '10234', '11456', '11489', '10171', '10824', '10220', '11245', '10966', '11455', '10609', '10229', '8493773300'])

In [None]:
model_trainer.create_train_set(data_dict, output_model_location='/home/huy/face_recog/encoded_data/aug5_test_wm', process=6)

### Train model

In [16]:
data = joblib.load('/home/huy/face_recog/encoded_data/aug6_train/features.joblib')
features, labels = list(zip(*data))

In [5]:
data = joblib.load('/home/huy/capstone/godofeye/demo/crop_dataset.joblib')
features, labels = data

In [6]:
model_trainer.train_knn(features, labels, K=300, weights='uniform', output_model_location='/home/huy/face_recog/models/knn')

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',
                     metric_params=None, n_jobs=None, n_neighbors=300, p=2,
                     weights='uniform')


In [9]:
model = model_trainer.train_simple_model(features, labels, output_model_location='/home/huy/face_recog/models/simple_distance_test')

In [11]:
joblib.dump(model, 'centroid.joblib')

['centroid.joblib']

### Train SVM SVC

In [10]:
import joblib
import numpy as np
from sklearn import svm
from time import time
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from datetime import datetime
C = 100
kernel = 'rbf'
gamma = 10
train_start = time()
clf = svm.SVC(kernel=kernel, C=C, gamma=gamma, probability=True, verbose=True)
# clf = OneVsRestClassifier(svm_clf, n_jobs=-1)
clf.fit(features, labels)
print('Training time: ', time() - train_start)
date = datetime.now().strftime('%d%m%Y_%H%M%S')
joblib.dump(clf, f'/home/huy/face_recog/models/svm/{kernel}_c{C}_g{gamma}_{date}.svm')

[LibSVM]Training time:  1075.9252300262451


['/home/huy/face_recog/models/svm/rbf_c100_g10_04072020_174310.svm']

In [None]:
svm_trained_model_path = f'/home/huy/face_recog/models/svm/{kernel}_c{C}_g{gamma}_{date}.svm'

### Train SGDClassifier

In [None]:
import joblib
import numpy as np
from sklearn.linear_model import SGDClassifier
from time import time
from datetime import datetime

train_start = time()
loss = 'log'
clf = SGDClassifier(loss=loss, max_iter=1000, tol=1e-3, verbose=True)
# clf = OneVsRestClassifier(svm_clf, n_jobs=-1)
clf.fit(features, labels)
print('Training time: ', time() - train_start)
date = datetime.now().strftime('%d%m%Y_%H%M%S')
sgd_trained_model_path = f'/home/huy/face_recog/models/sgd/loss{loss}_{date}.sgd'
joblib.dump(clf, sgd_trained_model_path)

In [23]:
import joblib
import numpy as np
from time import time
from datetime import datetime
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier

train_start = time()

rbf_feature = RBFSampler(gamma=1, random_state=1)
gaussian_features = rbf_feature.fit_transform(features)
loss = 'log'
clf = SGDClassifier(loss=loss, max_iter=1000, tol=1e-3, verbose=True)
# clf = OneVsRestClassifier(svm_clf, n_jobs=-1)
clf.fit(gaussian_features, labels)
print('Training time: ', time() - train_start)
date = datetime.now().strftime('%d%m%Y_%H%M%S')
sgd_trained_model_path = f'/home/huy/face_recog/models/sgd/RBFSampler_loss{loss}_{date}.sgd'
joblib.dump(clf, sgd_trained_model_path)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


-- Epoch 1
Norm: 13.22, NNZs: 100, Bias: -6.557617, T: 98700, Avg. loss: 0.014819
Total training time: 0.04 seconds.
-- Epoch 2
Norm: 13.27, NNZs: 100, Bias: -6.506358, T: 197400, Avg. loss: 0.013875
Total training time: 0.07 seconds.
-- Epoch 3
Norm: 13.25, NNZs: 100, Bias: -6.430628, T: 296100, Avg. loss: 0.013703
Total training time: 0.11 seconds.
-- Epoch 4
Norm: 13.29, NNZs: 100, Bias: -6.332976, T: 394800, Avg. loss: 0.013778
Total training time: 0.15 seconds.
-- Epoch 5
Norm: 13.26, NNZs: 100, Bias: -6.464987, T: 493500, Avg. loss: 0.013692
Total training time: 0.19 seconds.
-- Epoch 6
Norm: 13.29, NNZs: 100, Bias: -6.381207, T: 592200, Avg. loss: 0.013751
Total training time: 0.24 seconds.
Convergence after 6 epochs took 0.24 seconds
-- Epoch 1
Norm: 12.84, NNZs: 100, Bias: -4.757122, T: 98700, Avg. loss: 0.015769
Total training time: 0.04 seconds.
-- Epoch 2
Norm: 12.87, NNZs: 100, Bias: -4.739954, T: 197400, Avg. loss: 0.014679
Total training time: 0.08 seconds.
-- Epoch 3
No

[Parallel(n_jobs=1)]: Done 141 out of 141 | elapsed:   43.5s finished


['/home/huy/face_recog/models/sgd/RBFSampler_losslog_04072020_181307.sgd']

### Train MLPerception

In [None]:
from sklearn.neural_network import MLPClassifier


### Evaluate the model

#### Eclid Distance

In [None]:
import multiprocessing
from functools import partial
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score

%matplotlib inline
import logging
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
logging.getLogger('matplotlib.font_manager').disabled = True
logging.getLogger('matplotlib.colorbar').disabled = True

In [12]:
def run_evaluation(samples, test_set_type, threshold=0.0, method=[True, True, True], **kwargs):
    ### Euclid ###
    if method[0]:
        plt.clf()
        recog = FaceRecognition(
            feature_extractor_type='dlib',
            classifier_method='euclid',
            model_dir='/home/huy/face_recog/models/simple_distance/'
        )

        classes = [s.replace('\n', '') for s in recog.classes]
        classes.append('unknown')

        actual = []
        predict = []

        def process(samples):
            actual = []
            predict = []
            with tqdm(total=len(samples)) as pbar:
                for feature, label in samples:
                    predict_id = recog.recog([feature], threshold=1-threshold)
                    predict_id = predict_id[0][0].split('\n')[0]
                    actual.append(label)
                    predict.append(predict_id)
                    pbar.update(1)
            return (actual, predict)

        # samples_batch = split_into(samples, 6)

        # with multiprocessing.Pool(6) as pool:
        #     return_value = pool.map(process, samples_batch)
        #     r1, r2 = list(zip(*return_value))
        #     actual.extend(r1)
        #     predict.extend(r2)
        actual, predict = process(samples)

        cfm = confusion_matrix(actual, predict)
        norm_cfm = confusion_matrix(actual, predict, labels=classes, normalize='true')
        acc = accuracy_score(actual, predict)
        recall = recall_score(actual, predict, average='weighted')
        pre = precision_score(actual, predict, average='weighted')

        for i in range(len(norm_cfm)):
            for k in range(len(norm_cfm)):
                norm_cfm[i][k] = round(norm_cfm[i][k], 2)

        print('result:', acc, recall, pre)

        df_cm = pd.DataFrame(norm_cfm, classes, classes)
        # plt.figure(figsize=(10,7))
        sn.set(font_scale=0.1) # for label size
        sn.heatmap(df_cm, annot=True, annot_kws={"size": 1}) # font size

        # plt.show()
        plt.savefig(f'{test_set_type}/cfm_euclid.eps', format='eps', dpi=1200)

        ### Ignore unknown
        plt.clf()
        classes = [s.replace('\n', '') for s in recog.classes]

        actual_without_unknown = []
        predict_without_unknown = []

        unknown_count = 0
        for i in range(len(actual)):
            if predict[i] != 'unknown':
                actual_without_unknown.append(actual[i])
                predict_without_unknown.append(predict[i])
            else:
                unknown_count += 1

        cfm = confusion_matrix(actual_without_unknown, predict_without_unknown)
        norm_cfm = confusion_matrix(actual, predict, labels=classes, normalize='true')
        acc = accuracy_score(actual_without_unknown, predict_without_unknown)
        recall = recall_score(actual_without_unknown, predict_without_unknown, average='weighted')
        pre = precision_score(actual_without_unknown, predict_without_unknown, average='weighted')
        unknown_rate = unknown_count / len(predict)

        for i in range(len(norm_cfm)):
            for k in range(len(norm_cfm)):
                norm_cfm[i][k] = round(norm_cfm[i][k], 2)

        print('result_without_unknown:', acc, recall, pre)
        print('unknown_rate:', unknown_rate)

        df_cm = pd.DataFrame(norm_cfm, classes, classes)
        sn.set(font_scale=0.1) # for label size
        sn.heatmap(df_cm, annot=True, annot_kws={"size": 1}) # font size

        # plt.show()
        plt.savefig(f'{test_set_type}/cfm_euclid_without_unknown.eps', format='eps', dpi=1200)

    ### End Euclid ###

    ### SVM ###
    if method[1]:
        plt.clf()
        recog = FaceRecognition(
            feature_extractor_type='dlib',
            classifier_method='svm',
            model_path = kwargs['SVM_MODEL']
        )

        classes = [s.replace('\n', '') for s in recog.svm_clf.classes_]
        classes.append('unknown')

        actual = []
        predict = []

        def process(samples):
            actual = []
            predict = []
            with tqdm(total=len(samples)) as pbar:
                for feature, label in samples:
                    predict_id = recog.recog([feature], threshold=threshold)
                    predict_id = predict_id[0].split('\n')[0]
                    actual.append(label)
                    predict.append(predict_id)
                    pbar.update(1)
            return (actual, predict)

        # samples_batch = split_into(samples, 6)

        # with multiprocessing.Pool(6) as pool:
        #     return_value = pool.map(process, samples_batch)
        #     r1, r2 = list(zip(*return_value))
        #     actual.extend(r1)
        #     predict.extend(r2)
        actual, predict = process(samples)

        cfm = confusion_matrix(actual, predict)
        norm_cfm = confusion_matrix(actual, predict, labels=classes, normalize='true')
        acc = accuracy_score(actual, predict)
        recall = recall_score(actual, predict, average='weighted')
        pre = precision_score(actual, predict, average='weighted')

        for i in range(len(norm_cfm)):
            for k in range(len(norm_cfm)):
                norm_cfm[i][k] = round(norm_cfm[i][k], 2)

        print('result:', acc, recall, pre)

        df_cm = pd.DataFrame(norm_cfm, classes, classes)
        sn.set(font_scale=0.1) # for label size
        sn.heatmap(df_cm, annot=True, annot_kws={"size": 1}) # font size

        # plt.show()
        plt.savefig(f'{test_set_type}/cfm_svm.eps', format='eps', dpi=1200)


        ### Ignore unknown
        plt.clf()
        classes = [s.replace('\n', '') for s in recog.svm_clf.classes_]

        actual_without_unknown = []
        predict_without_unknown = []

        unknown_count = 0
        for i in range(len(actual)):
            if predict[i] != 'unknown':
                actual_without_unknown.append(actual[i])
                predict_without_unknown.append(predict[i])
            else:
                unknown_count += 1

        cfm = confusion_matrix(actual_without_unknown, predict_without_unknown)
        norm_cfm = confusion_matrix(actual, predict, labels=classes, normalize='true')
        acc = accuracy_score(actual_without_unknown, predict_without_unknown)
        recall = recall_score(actual_without_unknown, predict_without_unknown, average='weighted')
        pre = precision_score(actual_without_unknown, predict_without_unknown, average='weighted')
        unknown_rate = unknown_count / len(predict)

        for i in range(len(norm_cfm)):
            for k in range(len(norm_cfm)):
                norm_cfm[i][k] = round(norm_cfm[i][k], 2)

        print('result_without_unknown:', acc, recall, pre)
        print('unknown_rate:', unknown_rate)

        df_cm = pd.DataFrame(norm_cfm, classes, classes)
        sn.set(font_scale=0.1) # for label size
        sn.heatmap(df_cm, annot=True, annot_kws={"size": 1}) # font size

        # plt.show()
        plt.savefig(f'{test_set_type}/cfm_svm_without_unknown.eps', format='eps', dpi=1200)


    #### KNN #####
    if method[2]:
        plt.clf()
        recog = FaceRecognition(
            feature_extractor_type='dlib',
            classifier_method='knn',
            model_dir='/home/huy/face_recog/models/knn'
        )

        classes = recog.knn.classes_
        classes = [s.replace('\n', '') for s in classes]
        actual = []
        predict = []

        def process(samples):
            actual = []
            predict = []
            with tqdm(total=len(samples)) as pbar:
                for feature, label in samples:
                    predict_id = recog.recog([feature], threshold=threshold)
                    predict_id = predict_id[0].split('\n')[0]
                    actual.append(label)
                    predict.append(predict_id)
                    pbar.update(1)
            return (actual, predict)

        # samples_batch = split_into(samples, 6)

        # with multiprocessing.Pool(6) as pool:
        #     return_value = pool.map(process, samples_batch)
        #     r1, r2 = list(zip(*return_value))
        #     actual.extend(r1)
        #     predict.extend(r2)
        actual, predict = process(samples)

        cfm = confusion_matrix(actual, predict)
        norm_cfm = confusion_matrix(actual, predict, labels=classes, normalize='true')
        acc = accuracy_score(actual, predict)
        recall = recall_score(actual, predict, average='weighted')
        pre = precision_score(actual, predict, average='weighted')

        for i in range(len(norm_cfm)):
            for k in range(len(norm_cfm)):
                norm_cfm[i][k] = round(norm_cfm[i][k], 2)

        print('result:', acc, recall, pre)

        df_cm = pd.DataFrame(norm_cfm, classes, classes)
        # plt.figure(figsize=(10,7))
        sn.set(font_scale=0.1) # for label size
        sn.heatmap(df_cm, annot=True, annot_kws={"size": 1}) # font size

        # plt.show()
        plt.savefig(f'{test_set_type}/cfm_knn.eps', format='eps', dpi=1200)

        ### Ignore unknown
        plt.clf()
        classes = [s.replace('\n', '') for s in recog.knn.classes_]

        actual_without_unknown = []
        predict_without_unknown = []

        unknown_count = 0
        for i in range(len(actual)):
            if predict[i] != 'unknown':
                actual_without_unknown.append(actual[i])
                predict_without_unknown.append(predict[i])
            else:
                unknown_count += 1

        cfm = confusion_matrix(actual_without_unknown, predict_without_unknown)
        norm_cfm = confusion_matrix(actual, predict, labels=classes, normalize='true')
        acc = accuracy_score(actual_without_unknown, predict_without_unknown)
        recall = recall_score(actual_without_unknown, predict_without_unknown, average='weighted')
        pre = precision_score(actual_without_unknown, predict_without_unknown, average='weighted')
        unknown_rate = unknown_count / len(predict)

        for i in range(len(norm_cfm)):
            for k in range(len(norm_cfm)):
                norm_cfm[i][k] = round(norm_cfm[i][k], 2)

        print('result_without_unknown:', acc, recall, pre)
        print('unknown_rate:', unknown_rate)

        df_cm = pd.DataFrame(norm_cfm, classes, classes)
        sn.set(font_scale=0.1) # for label size
        sn.heatmap(df_cm, annot=True, annot_kws={"size": 1}) # font size

        # plt.show()
        plt.savefig(f'{test_set_type}/cfm_knn_without_unknown.eps', format='eps', dpi=1200)

    ### End KNN ###

In [24]:
test_type = ['wm', 'hm']
samples_list = []
# SVM_MODEL = svm_trained_model_path
SVM_MODEL = '/home/huy/face_recog/models/sgd/RBFSampler_losslog_04072020_181307.sgd'
rbf_feature = RBFSampler(gamma=1, random_state=1)
for t in test_type:
    samples = joblib.load(f'/home/huy/face_recog/encoded_data/aug5_test_{t}/features.joblib')
    gaussian_features = rbf_feature.fit_transform(samples)
    run_evaluation(gaussian_features, t, threshold=0.0, method=[False, True, False], SVM_MODEL=SVM_MODEL)
#     run_evaluation(samples, t, threshold=0.0, method=[False, True, False], SVM_MODEL=SVM_MODEL)

  0%|          | 0/24000 [00:00<?, ?it/s]Traceback (most recent call last):
  File "/home/huy/capstone/godofeye/lib/blueeyes/face_recognition/recognition.py", line 245, in _svm_recog
    probas_list = self.svm_clf.predict_proba(features)
  File "/home/huy/venv/cv/lib/python3.8/site-packages/sklearn/linear_model/_stochastic_gradient.py", line 1002, in _predict_proba
    return self._predict_proba_lr(X)
  File "/home/huy/venv/cv/lib/python3.8/site-packages/sklearn/linear_model/_base.py", line 307, in _predict_proba_lr
    prob = self.decision_function(X)
  File "/home/huy/venv/cv/lib/python3.8/site-packages/sklearn/linear_model/_base.py", line 272, in decision_function
    raise ValueError("X has %d features per sample; expecting %d"
ValueError: X has 128 features per sample; expecting 100
  0%|          | 0/24000 [00:00<?, ?it/s]


IndexError: list index out of range

<Figure size 432x288 with 0 Axes>

In [15]:
def split_into(lst, n_part):
    parts = []
    part_size = len(lst)//n_part
    i = 0
    while i < n_part:
        parts.append(lst[i*part_size:(i*part_size+part_size)])
        i += 1
    for i in range(len(lst) % n_part):
        key, value = lst[n_part*part_size+i]
        parts[i][key] = value
    return parts

In [None]:
recog = FaceRecognition(
    classifier_method='euclid',
    model_dir='/home/huy/face_recog/models/simple_distance/'
)
TP_count = 0
UNK_count = 0
num_samples = 0
for id, img_paths in test_set_dict.items():
    for path in img_paths:
        img = cv2.imread(path, 1)
        predict_id = recog.recog(img,[[0,0,img.shape[1],img.shape[0]]], threshold=0.9)
        predict_id = predict_id[0][0].split('\n')[0]
        if predict_id == id:
            TP_count += 1
        elif predict_id == 'unknown':
            UNK_count +=1
        num_samples += 1
# TP rate don't care UNK
print('num_samples\t', 'TP_count\t', 'UNK_count\t')
print(num_samples, TP_count, UNK_count)
print('TP Rate ',TP_count/(num_samples-UNK_count))
# False rate
# print(1 - (TP_count+UNK_count)/num_samples
print('UNK rate ', UNK_count/num_samples)

In [None]:
recog = FaceRecognition(
    model_dir='/home/huy/face_recog/models/knn/', 
   classifier_method='knn'
)
TP_count = 0
UNK_count = 0
num_samples = 0
for id, img_paths in test_set_dict.items():
    for path in img_paths:
        img = cv2.imread(path, 1)
        predict_id = recog.recog(img,[[0,0,img.shape[1],img.shape[0]]], threshold=0.5)
        predict_id = predict_id[0][0].split('\n')[0]
        if predict_id == id:
            TP_count += 1
        elif predict_id == 'unknown':
            UNK_count +=1
        num_samples += 1
# TP rate don't care UNK
print('num_samples\t', 'TP_count\t', 'UNK_count\t')
print(num_samples, TP_count, UNK_count)
print('TP Rate ',TP_count/(num_samples-UNK_count))
# False rate
# print(1 - (TP_count+UNK_count)/num_samples
print('UNK rate ', UNK_count/num_samples)

In [None]:
recog = FaceRecognition(
    model_dir='/home/huy/models/simple_distance/',
    feature_extractor_type='face_recognition'
)
TP_count = 0
UNK_count = 0
num_samples = 0
for id, img_paths in test_set_dict.items():
    for path in img_paths:
        img = cv2.imread(path, 1)
        predict_id = recog.recog(img,[[0,0,img.shape[1],img.shape[0]]], threshold=0.5)
        predict_id = predict_id[0][0].split('\n')[0]
        if predict_id == id:
            TP_count += 1
        elif predict_id == 'unknown':
            UNK_count +=1
        num_samples += 1
# TP rate don't care UNK
print('num_samples\t', 'TP_count\t', 'UNK_count\t')
print(num_samples, TP_count, UNK_count)
print('TP Rate ',TP_count/(num_samples-UNK_count))
# False rate
# print(1 - (TP_count+UNK_count)/num_samples
print('UNK rate ', UNK_count/num_samples)