In [1]:
import numpy as np;
import pickle
import pandas as pd 

In [None]:
# LOAD
# SPLIT 
# TRAIN AND TEST

In [2]:
data = pickle.load(open('./data_pickle.pickle', mode='rb'))

In [3]:
x = np.array(data['data'])   # independent var
y = np.array(data['label'])  # dependent var

In [4]:
x.shape, y.shape 

((1219, 1, 128), (1219,))

In [5]:
x = x.reshape(-1, 128)
x.shape

(1219, 128)

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, train_size=0.8, random_state = 0)

In [8]:
xtrain.shape, xtest.shape, ytrain.shape, ytest.shape

((975, 128), (244, 128), (975,), (244,))

In [9]:
# train ml

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [12]:
def get_report(model, xtrain, xtest, ytrain, ytest):
    y_pred_train = model.predict(xtrain)
    y_pred_test = model.predict(xtest)

    # accuracy 
    acc_train = accuracy_score(ytrain, y_pred_train)
    acc_test = accuracy_score(ytest, y_pred_test)

    # f1 score 
    f1_score_train = f1_score(ytrain, y_pred_train, average='macro')
    f1_score_test = f1_score(ytest, y_pred_test, average='macro')

    print("Accuracy train: ", acc_train)
    print("Accuracy test: ", acc_test)
    print("F1 score train: ", f1_score_train)
    print("F1 score test: ", f1_score_test)

In [11]:
model_KNN = KNeighborsClassifier(n_neighbors=3)
model_KNN.fit(xtrain, ytrain)

KNeighborsClassifier(n_neighbors=3)

In [13]:
get_report(model_KNN, xtrain, xtest, ytrain, ytest)

Accuracy train:  0.5323076923076923
Accuracy test:  0.21721311475409835
F1 score train:  0.5183802629632687
F1 score test:  0.20749959084653835


In [None]:
# Logistic Regression

In [9]:
model_logistic = LogisticRegression()
model_logistic.fit(xtrain, ytrain)

LogisticRegression()

In [11]:
get_report(model_logistic,  xtrain, xtest, ytrain, ytest)

Accuracy train:  0.3292307692307692
Accuracy test:  0.20491803278688525
F1 score train:  0.30890693477486575
F1 score test:  0.18544831339221496


In [17]:
# Support Vector Machines

In [12]:
model_svc = SVC(probability = True)
model_svc.fit(xtrain, ytrain)

SVC(probability=True)

In [13]:
get_report(model_svc,  xtrain, xtest, ytrain, ytest)

Accuracy train:  0.5066666666666667
Accuracy test:  0.2459016393442623
F1 score train:  0.5104390016146961
F1 score test:  0.23359124110046395


In [None]:
# Random forest

In [16]:
model_rf = RandomForestClassifier(n_estimators=10)
model_rf.fit(xtrain, ytrain)

RandomForestClassifier(n_estimators=10)

In [17]:
get_report(model_rf,  xtrain, xtest, ytrain, ytest)

Accuracy train:  0.9948717948717949
Accuracy test:  0.1598360655737705
F1 score train:  0.9951796437268728
F1 score test:  0.14440872741701455


In [None]:
# Voting classifier 

In [21]:
model_voting = VotingClassifier(estimators=[
    ('logistic', LogisticRegression()),
    ('svm', SVC(probability=True)),
    ('rf', RandomForestClassifier())
], voting='soft', weights=[2, 3, 1])

In [22]:
model_voting.fit(xtrain, ytrain)

VotingClassifier(estimators=[('logistic', LogisticRegression()),
                             ('svm', SVC(probability=True)),
                             ('rf', RandomForestClassifier())],
                 voting='soft', weights=[2, 3, 1])

In [23]:
get_report(model_voting, xtrain, xtest, ytrain, ytest)

Accuracy train:  0.8912820512820513
Accuracy test:  0.25
F1 score train:  0.893027389315306
F1 score test:  0.22199888172774712


In [None]:
# Parameter tuning

In [24]:
from sklearn.model_selection import GridSearchCV

In [27]:
model_grid = GridSearchCV(model_voting,
                         param_grid={
                             'svm__C' : [3, 5, 7, 10],
                             'svm__gamma' : [0.1, 0.3, 0.5],
                             'rf__n_estimators' : [5, 10, 20],
                             'rf__max_depth' : [3, 5, 7],
                             'voting' : ['soft', 'hard']
                         }, scoring='accuracy', cv=3, n_jobs=1, verbose=2)

In [28]:
model_grid.fit(xtrain, ytrain)

Fitting 3 folds for each of 216 candidates, totalling 648 fits
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   1.0s
[

[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.3, voting=soft; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.3, voting=soft; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.3, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.3, voting=hard; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.3, voting=hard; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.3, voting=hard; total time=   1.0s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=10, svm__C=3

[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=3, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=3, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=3, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=3, svm__gamma=0.5, voting=hard; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=3, svm__gamma=0.5, voting=hard; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=3, svm__gamma=0.5, voting=hard; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=5, svm__gamma=0.1, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=5, svm__gamma=0.1, voting=soft; total time=   1.2s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=5, svm__gamma=0.1, voting=soft; total time=   1.1s
[CV] END rf__max_depth=3, rf__n_estimators=20, svm__C=5

[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.1, voting=soft; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.1, voting=soft; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.1, voting=hard; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.1, voting=hard; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.1, voting=hard; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.3, voting=soft; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.3, voting=soft; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.3, voting=soft; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gamma=0.3, voting=hard; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=5, svm__C=5, svm__gam

[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.3, voting=soft; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.3, voting=hard; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.3, voting=hard; total time=   1.2s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.3, voting=hard; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.5, voting=hard; total time=   1.0s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5, svm__gamma=0.5, voting=hard; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=10, svm__C=5

[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=5, svm__gamma=0.5, voting=soft; total time=   1.2s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=5, svm__gamma=0.5, voting=hard; total time=   1.2s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=5, svm__gamma=0.5, voting=hard; total time=   1.2s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=5, svm__gamma=0.5, voting=hard; total time=   1.2s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=7, svm__gamma=0.1, voting=soft; total time=   1.2s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=7, svm__gamma=0.1, voting=soft; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=7, svm__gamma=0.1, voting=soft; total time=   1.2s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=7, svm__gamma=0.1, voting=hard; total time=   1.1s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=7, svm__gamma=0.1, voting=hard; total time=   1.2s
[CV] END rf__max_depth=5, rf__n_estimators=20, svm__C=7

[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.1, voting=hard; total time=   1.0s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.1, voting=hard; total time=   1.0s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.1, voting=hard; total time=   1.0s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.3, voting=soft; total time=   1.1s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.3, voting=soft; total time=   1.0s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.3, voting=soft; total time=   1.1s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.3, voting=hard; total time=   1.0s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.3, voting=hard; total time=   1.0s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gamma=0.3, voting=hard; total time=   1.1s
[CV] END rf__max_depth=7, rf__n_estimators=5, svm__C=7, svm__gam

[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=7, svm__gamma=0.3, voting=hard; total time=   1.1s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=7, svm__gamma=0.3, voting=hard; total time=   1.1s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=7, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=7, svm__gamma=0.5, voting=soft; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=7, svm__gamma=0.5, voting=soft; total time=   1.1s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=7, svm__gamma=0.5, voting=hard; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=7, svm__gamma=0.5, voting=hard; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=7, svm__gamma=0.5, voting=hard; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=10, svm__gamma=0.1, voting=soft; total time=   1.1s
[CV] END rf__max_depth=7, rf__n_estimators=10, svm__C=

[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=7, svm__gamma=0.5, voting=hard; total time=   1.3s
[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=7, svm__gamma=0.5, voting=hard; total time=   1.3s
[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=10, svm__gamma=0.1, voting=soft; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=10, svm__gamma=0.1, voting=soft; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=10, svm__gamma=0.1, voting=soft; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=10, svm__gamma=0.1, voting=hard; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=10, svm__gamma=0.1, voting=hard; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=10, svm__gamma=0.1, voting=hard; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=20, svm__C=10, svm__gamma=0.3, voting=soft; total time=   1.2s
[CV] END rf__max_depth=7, rf__n_estimators=20, s

GridSearchCV(cv=3,
             estimator=VotingClassifier(estimators=[('logistic',
                                                     LogisticRegression()),
                                                    ('svm',
                                                     SVC(probability=True)),
                                                    ('rf',
                                                     RandomForestClassifier())],
                                        voting='soft', weights=[2, 3, 1]),
             n_jobs=1,
             param_grid={'rf__max_depth': [3, 5, 7],
                         'rf__n_estimators': [5, 10, 20],
                         'svm__C': [3, 5, 7, 10], 'svm__gamma': [0.1, 0.3, 0.5],
                         'voting': ['soft', 'hard']},
             scoring='accuracy', verbose=2)

In [31]:
model_best_estimator = model_grid.best_estimator_

In [30]:
model_grid.best_score_

0.2687179487179487

In [None]:
# Save pickle

In [33]:
pickle.dump(model_best_estimator, open('../models/ml_face_identity.pkl', mode='wb'))