In [2]:
import os
import sys
import cv2
import glob
import numpy as np
from time import time
from queue import Queue
from collections import namedtuple

sys.path.append('/home/huy/code/godofeye/lib')
sys.path.append('/home/huy/code/godofeye/lib/yoloface')

from blueeyes.face_recognition import FaceDetector, FaceRecognition, FeatureExtractor, ModelTraining
from blueeyes.utils import Camera

### Face Crop from Images (Optional)

In [None]:
from pathlib import Path

IMAGES_DIR = '/home/huy/data/face_recog/train_test_raw/'
OUTPUT_DIR = '/home/huy/data/face_recog/train_test'

detector = FaceDetector('mtcnn', min_face_size=50)

count = 0

for img_path in glob.glob(IMAGES_DIR + '/**/*.jpg', recursive=True):
    path = Path(img_path)
    id = path.parent.name
    im = cv2.imread(str(path), 1)
    boxes = detector.detect(im)
    for left,top,right,bottom in boxes:
        crop = im[top:bottom,left:right,:]
        output_dir = OUTPUT_DIR + f'/{id}'
        output_path = output_dir + f'/{count}.jpg'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        cv2.imwrite(output_path, crop)
        print('Write to ', output_path)
        count += 1

### Create Train Test Set

In [2]:
from pathlib import Path

train_set_dict = {}
test_set_dict = {}

TRAINSET_LOCATION = '/home/huy/smartbuilding/face_recog_models/dataset/CBGVDataset_v2/*/WM/train/*.jpg'
TESTSET_LOCATION = '/home/huy/smartbuilding/face_recog_models/dataset/CBGVDataset_v2/*/WM/test/*.jpg'

for path in glob.glob(TRAINSET_LOCATION):
    path = Path(path)
    id = path.parent.parent.parent.name
    if id not in train_set_dict.keys():
        train_set_dict[id] = []
    train_set_dict[id].append(str(path)) 
for path in glob.glob(TESTSET_LOCATION):
    path = Path(path)
    id = path.parent.parent.parent.name
    if id not in test_set_dict.keys():
        test_set_dict[id] = []
    test_set_dict[id].append(str(path))

# for entry in os.scandir('/home/huy/face_recog/dataset/Data v4.1/train_set_mix'):
#     id = entry.name
#     train_paths = []
#     test_paths = []
#     all_paths = glob.glob(os.path.join(entry.path, '*'))
#     np.random.shuffle(all_paths)
#     for path in all_paths[2:len(all_paths)]:
#         train_paths.append(os.path.abspath(path))
#     for path in all_paths[0:2]:
#         test_paths.append(os.path.abspath(path))
# #     for path in all_paths:
# #         train_paths.append(os.path.abspath(path))
#     train_set_dict[id] = train_paths
#     test_set_dict[id] = test_paths

In [3]:
# auto split train test
from pathlib import Path

RATIO = 0.7

all_set_dict = {}
train_set_dict = {}
test_set_dict = {}

TRAINSET_LOCATION = '/home/huy/Downloads/StaffDATA_v1(CBGVDataset_v3.1)/Aug2/**/*.jpg'

for path in glob.glob(TRAINSET_LOCATION, recursive=True):
    path = Path(path)
    id = path.parent.parent.parent.name
    if id not in all_set_dict.keys():
        all_set_dict[id] = []
    all_set_dict[id].append(str(path)) 

for label, paths in all_set_dict.items():
    n = int(len(paths)*RATIO)
    train_set_dict[label] = paths[0:n]
    test_set_dict[label] = paths[n:]

### Train the model

In [4]:
feature_extractor = FeatureExtractor('face_recognition')
model_trainer = ModelTraining(feature_extractor=feature_extractor)

In [None]:
model_trainer.create_train_set(train_set_dict, output_model_location='/home/huy/face_recog/encoded_data')

HBox(children=(FloatProgress(value=0.0, max=67400.0), HTML(value='')))

In [None]:
model_trainer.train_knn(train_set_dict, K=300, weights='uniform', output_model_location='/home/huy/face_recog/models/knn')

In [None]:
model_trainer.train_simple_model(train_set_dict, output_model_location='/home/huy/face_recog/models/simple_distance')

### Evaluate the model

In [8]:
recog = FaceRecognition(
    classifier_method='nn'
)
TP_count = 0
UNK_count = 0
num_samples = 0
for id, img_paths in test_set_dict.items():
    for path in img_paths:
        img = cv2.imread(path, 1)
        predict_id = recog.recog(img,[[0,0,img.shape[1],img.shape[0]]], threshold=0.9)
        predict_id = predict_id[0][0].split('\n')[0]
        if predict_id == id:
            TP_count += 1
        elif predict_id == 'unknown':
            UNK_count +=1
        num_samples += 1
# TP rate don't care UNK
print('num_samples\t', 'TP_count\t', 'UNK_count\t')
print(num_samples, TP_count, UNK_count)
print('TP Rate ',TP_count/(num_samples-UNK_count))
# False rate
# print(1 - (TP_count+UNK_count)/num_samples
print('UNK rate ', UNK_count/num_samples)

num_samples	 TP_count	 UNK_count	
218 163 51
TP Rate  0.9760479041916168
UNK rate  0.23394495412844038


In [None]:
recog = FaceRecognition(
    model_dir='/home/huy/face_recog/models/knn/', 
    vggface=False, 
    use_knn=True,
    # retrain=False
)
TP_count = 0
UNK_count = 0
num_samples = 0
for id, img_paths in test_set_dict.items():
    for path in img_paths:
        img = cv2.imread(path, 1)
        predict_id = recog.recog(img,[[0,0,img.shape[1],img.shape[0]]], threshold=0.5)
        predict_id = predict_id[0][0].split('\n')[0]
        if predict_id == id:
            TP_count += 1
        elif predict_id == 'unknown':
            UNK_count +=1
        num_samples += 1
# TP rate don't care UNK
print('num_samples\t', 'TP_count\t', 'UNK_count\t')
print(num_samples, TP_count, UNK_count)
print('TP Rate ',TP_count/(num_samples-UNK_count))
# False rate
# print(1 - (TP_count+UNK_count)/num_samples
print('UNK rate ', UNK_count/num_samples)

In [12]:
recog = FaceRecognition(
    model_dir='/home/huy/models/simple_distance/',
    feature_extractor_type='face_recognition'
)
TP_count = 0
UNK_count = 0
num_samples = 0
for id, img_paths in test_set_dict.items():
    for path in img_paths:
        img = cv2.imread(path, 1)
        predict_id = recog.recog(img,[[0,0,img.shape[1],img.shape[0]]], threshold=0.5)
        predict_id = predict_id[0][0].split('\n')[0]
        if predict_id == id:
            TP_count += 1
        elif predict_id == 'unknown':
            UNK_count +=1
        num_samples += 1
# TP rate don't care UNK
print('num_samples\t', 'TP_count\t', 'UNK_count\t')
print(num_samples, TP_count, UNK_count)
print('TP Rate ',TP_count/(num_samples-UNK_count))
# False rate
# print(1 - (TP_count+UNK_count)/num_samples
print('UNK rate ', UNK_count/num_samples)

num_samples	 TP_count	 UNK_count	
218 214 3
TP Rate  0.9953488372093023
UNK rate  0.013761467889908258


#### KNN classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import pickle
import numpy as np

In [None]:
KNN = pickle.load(open('/home/feature = [1]*128huy/Downloads/knn_clf.pkl', 'rb'))

In [None]:
feature = np.random.random((128,1))

In [None]:
x = KNN.predict_proba([feature])

In [None]:
knn = KNeighborsClassifier()

In [None]:
KNN.algorithm