In [26]:
# Import các thư viện cần thiết
import numpy as np
import cv2
import pandas as pd
import os
import pickle
from sklearn.model_selection import train_test_split
from tools import *

In [27]:
path = "D:/workspace/OpenCV/"
# hiển thị tất cả thư mục hoặc file
os.listdir(path)

['data',
 'data_preprocessing.ipynb',
 'faces',
 'models',
 'pipline.ipynb',
 'tools.py',
 'traning_data.ipynb',
 '__pycache__']

In [28]:
data = pickle.load(open('./data/data_face_features.pickle', mode="rb"))

In [29]:
x = np.array(data['data'])
y = np.array(data['label'])

print(x)
print(y)

[[[ 0.08582852  0.04553367 -0.05129068 ... -0.05451423  0.05332796
    0.01897972]]

 [[ 0.03542405  0.06168718 -0.12150856 ...  0.01143151 -0.05509765
   -0.02112115]]

 [[ 0.0859497  -0.05133062  0.02860345 ... -0.00365922 -0.07709135
   -0.09461875]]

 ...

 [[ 0.11747491  0.08736821  0.04978189 ... -0.01546835  0.06892789
   -0.10004354]]

 [[ 0.01590434  0.20860533  0.02372206 ... -0.00772081  0.11865498
    0.03948917]]

 [[ 0.04519274  0.14802817 -0.042697   ...  0.04653334  0.05841489
    0.01750706]]]
['HariWon' 'HariWon' 'HariWon' 'HariWon' 'HariWon' 'HariWon' 'HariWon'
 'HariWon' 'HariWon' 'HariWon' 'HariWon' 'HariWon' 'HariWon' 'HariWon'
 'HariWon' 'HariWon' 'HariWon' 'HariWon' 'KhoiMy' 'KhoiMy' 'KhoiMy'
 'KhoiMy' 'KhoiMy' 'KhoiMy' 'KhoiMy' 'KhoiMy' 'KhoiMy' 'KhoiMy' 'KhoiMy'
 'KhoiMy' 'KhoiMy' 'KhoiMy' 'KhoiMy' 'KhoiMy' 'MinhHang' 'MinhHang'
 'MinhHang' 'MinhHang' 'MinhHang' 'MinhHang' 'MinhHang' 'MinhHang'
 'MinhHang' 'MinhHang' 'NgocTrinh' 'NgocTrinh' 'NgocTrinh' 'NgocTr

In [30]:
print(x.shape)
print(y.shape)


(87, 1, 128)
(87,)


In [31]:

x = x.reshape(-1, 128)

In [32]:

print(x.shape)

(87, 128)


In [33]:
# chia nhỏ dữ liệu
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=0)

In [34]:

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((69, 128), (18, 128), (69,), (18,))

In [35]:

# Train machine learning
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score


In [36]:
# Logistic regression
model_logistic = LogisticRegression()
model_logistic.fit(x_train, y_train) # training

In [37]:
def get_report(model, x_train, y_train, x_test, y_test):
  y_pred_train = model.predict(x_train)
  y_pred_test = model.predict(x_test)

  # accuracy score
  acc_train = accuracy_score(y_train, y_pred_train)
  acc_test = accuracy_score(y_test, y_pred_test)

  # f1 score
  f1_score_train = f1_score(y_train, y_pred_train, average="macro")
  f1_score_test = f1_score(y_test, y_pred_test, average="macro")

  # print
  print('Accurency Train = %0.2f'%acc_train)
  print('Accurency Test = %0.2f'%acc_test)
  print('F1 Score Train = %0.2f'%f1_score_train)
  print('F1 Score Test = %0.2f'%f1_score_test)

In [38]:
get_report(model_logistic, x_train, y_train, x_test, y_test)

Accurency Train = 0.67
Accurency Test = 0.39
F1 Score Train = 0.54
F1 Score Test = 0.42


In [39]:
# Support Vector Machines
model_svc = SVC(probability=True)
model_svc.fit(x_train, y_train)

In [40]:
get_report(model_svc, x_train, y_train, x_test, y_test)

Accurency Train = 0.72
Accurency Test = 0.50
F1 Score Train = 0.64
F1 Score Test = 0.52


In [41]:
# Random forest
model_rf = RandomForestClassifier(n_estimators=20,)
model_rf.fit(x_train, y_train)

In [42]:
get_report(model_rf, x_train, y_train, x_test, y_test)

Accurency Train = 1.00
Accurency Test = 0.61
F1 Score Train = 1.00
F1 Score Test = 0.63


In [43]:
# Voting Classifier
model_voting = VotingClassifier(estimators=[
    ('logistic', LogisticRegression()),
    ('svm', SVC(probability=True)),
    ('rf', RandomForestClassifier(n_estimators=20,)),
], voting='soft', weights=[2,3,1])

In [44]:
model_voting.fit(x_train, y_train)

In [45]:
get_report(model_voting, x_train, y_train, x_test, y_test)

Accurency Train = 0.93
Accurency Test = 0.56
F1 Score Train = 0.93
F1 Score Test = 0.57


In [46]:
# Parameter Tuning
from sklearn.model_selection import GridSearchCV

model_grid = GridSearchCV(model_voting, param_grid={
    'svm__C': [3,5,7,10],
    'svm__gamma': [0.1, 0.3, 0.5],
    'rf__n_estimators': [5, 10,20],
    'rf__max_depth': [3, 5, 7],
    'voting': ['soft', 'hard']
}, scoring='accuracy', cv=3, n_jobs=1, verbose=2)

In [47]:
model_grid.fit(x_train, y_train)

Fitting 3 folds for each of 216 candidates, totalling 648 fits
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   0.0s
[

[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.5, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.5, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.5, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.5, voting=hard; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.5, voting=hard; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.5, voting=hard; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=5, svm__gamma=0.1, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=5, svm__gamma=0.1, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=5, svm__gamma=0.1, voting=soft; total time=   0.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=5, svm__gam

In [48]:
model_best_estimator = model_grid.best_estimator_

In [49]:
model_grid.best_score_

0.6521739130434782

In [50]:
# Save Model
pickle.dump(model_best_estimator, open("./data/ml_face_person_identity.pkl", mode='wb'))