### 1. Setting paths for datasets

In [1]:
import os 
import numpy as np

cwd =  os.getcwd()

train_path = os.path.join(cwd, 'dataset/combined_5celebs_family/train')
validation_path = os.path.join(cwd, 'dataset/combined_5celebs_family/val/')

### 2. Loading the training data

In [2]:
from face_recog.io import ImageDataset

data_train = ImageDataset(train_path)
X_train, y_train = data_train.load_data(convert_xy=True)

In [3]:
print('Number of images:', len(X_train))
print('Number of labels:', len(y_train))

for i, x in enumerate(X_train):
    print('Label:', y_train[i], 'Shape:', x.shape)
    break

Number of images: 187
Number of labels: 187
Label: aditya Shape: (1471, 1140, 3)


### 3. Creating a Pipeline

In [4]:
from face_recog.detector import FaceDetectorTransformer
from face_recog.recognizer import FacenetEmbeddingsTransformer
from sklearn.preprocessing import Normalizer
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline

detector = FaceDetectorTransformer(final_size=(160, 160))
recognizer = FacenetEmbeddingsTransformer(model_path='models/keras/facenet_keras.h5')
norm = Normalizer(norm='l2')
clf = XGBClassifier()

pipeline = Pipeline([('detector', detector), 
                     ('face_embedding_transformers', recognizer), 
                     ('normalizer', norm), 
                     ('classifier', clf) ])

pipeline.fit(X_train, y_train)

Using TensorFlow backend.




Pipeline(memory=None,
         steps=[('detector',
                 <face_recog.detector.FaceDetectorTransformer object at 0x00000182702D92E8>),
                ('face_embedding_transformers',
                 <face_recog.recognizer.FacenetEmbeddingsTransformer object at 0x00000182702D3F28>),
                ('normalizer', Normalizer(copy=True, norm='l2')),
                ('classifier',
                 XGBClassifier(base_score=0.5, booster=None,
                               colsample_bylevel=1, col...
                               interaction_constraints=None,
                               learning_rate=0.300000012, max_delta_step=0,
                               max_depth=6, min_child_weight=1, missing=nan,
                               monotone_constraints=None, n_estimators=100,
                               n_jobs=0, num_parallel_tree=1,
                               objective='multi:softprob', random_state=0,
                               reg_alpha=0, reg_lambda=1, scal

In [5]:
pipeline.steps

[('detector', <face_recog.detector.FaceDetectorTransformer at 0x182702d92e8>),
 ('face_embedding_transformers',
  <face_recog.recognizer.FacenetEmbeddingsTransformer at 0x182702d3f28>),
 ('normalizer', Normalizer(copy=True, norm='l2')),
 ('classifier',
  XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
                colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
                importance_type='gain', interaction_constraints=None,
                learning_rate=0.300000012, max_delta_step=0, max_depth=6,
                min_child_weight=1, missing=nan, monotone_constraints=None,
                n_estimators=100, n_jobs=0, num_parallel_tree=1,
                objective='multi:softprob', random_state=0, reg_alpha=0,
                reg_lambda=1, scale_pos_weight=None, subsample=1,
                tree_method=None, validate_parameters=False, verbosity=None))]

In [6]:
pipeline.steps

[('detector', <face_recog.detector.FaceDetectorTransformer at 0x182702d92e8>),
 ('face_embedding_transformers',
  <face_recog.recognizer.FacenetEmbeddingsTransformer at 0x182702d3f28>),
 ('normalizer', Normalizer(copy=True, norm='l2')),
 ('classifier',
  XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
                colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
                importance_type='gain', interaction_constraints=None,
                learning_rate=0.300000012, max_delta_step=0, max_depth=6,
                min_child_weight=1, missing=nan, monotone_constraints=None,
                n_estimators=100, n_jobs=0, num_parallel_tree=1,
                objective='multi:softprob', random_state=0, reg_alpha=0,
                reg_lambda=1, scale_pos_weight=None, subsample=1,
                tree_method=None, validate_parameters=False, verbosity=None))]

In [10]:
pipeline.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'detector', 'face_embedding_transformers', 'normalizer', 'classifier', 'normalizer__copy', 'normalizer__norm', 'classifier__objective', 'classifier__base_score', 'classifier__booster', 'classifier__colsample_bylevel', 'classifier__colsample_bynode', 'classifier__colsample_bytree', 'classifier__gamma', 'classifier__gpu_id', 'classifier__importance_type', 'classifier__interaction_constraints', 'classifier__learning_rate', 'classifier__max_delta_step', 'classifier__max_depth', 'classifier__min_child_weight', 'classifier__missing', 'classifier__monotone_constraints', 'classifier__n_estimators', 'classifier__n_jobs', 'classifier__num_parallel_tree', 'classifier__random_state', 'classifier__reg_alpha', 'classifier__reg_lambda', 'classifier__scale_pos_weight', 'classifier__subsample', 'classifier__tree_method', 'classifier__validate_parameters', 'classifier__verbosity'])

In [15]:
pipeline.steps[1][1].discard_model()

In [16]:
import pickle

with open('face_recog_pipeline.pkl', 'wb') as output:
    pickle.dump(pipeline, output, pickle.HIGHEST_PROTOCOL)

### 6. Validation

#### Loading data

In [6]:
data_val = ImageDataset(validation_path)
X_val, y_val = data_val.load_data(convert_xy=True)

print('Number of images:', len(X_val))
print('Number of labels:', len(y_val))

for i, x in enumerate(X_val):
    print('Label:', y_val[i], 'Shape:', x.shape)
    break

Number of images: 49
Number of labels: 49
Label: aditya Shape: (837, 1224, 3)


In [7]:
y_pred = pipeline.predict(X_val)

Error finding bounding box for IMG NUM (30): 
 not enough values to unpack (expected 3, got 2)


In [8]:
from sklearn.metrics import accuracy_score

y_val = [v for i, v in enumerate(y_val) if i not in [30]]
print('Accuracy:', accuracy_score(y_val, y_pred)*100, '%')

Accuracy: 95.83333333333334 %


In [15]:
type(X_val[0])
X_val[0].shape

(837, 1224, 3)