In [1]:
import numpy as np
import pandas as pd

#
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [2]:
# load numpy array
data = np.load('./data/data_pca_50_target.npz')
data.files

['arr_0', 'arr_1']

In [3]:
data.allow_pickle = True


In [4]:
X = data['arr_0'] # pca data with 50 components
y = data['arr_1'] # target or dependent variable

In [5]:
X.shape

(4320, 50)

In [6]:
y.shape

(4320,)

In [7]:
X

array([[ 0.87739254,  0.26123435,  0.25160756, ...,  0.79367915,
         0.41591923,  0.92798791],
       [-0.6137835 , -1.56227596,  0.15194091, ...,  0.41253052,
         2.05608752,  0.50593841],
       [-0.78424799, -0.31739287,  0.3570641 , ..., -1.14560118,
         0.41605978, -1.97031982],
       ...,
       [ 1.33758048,  0.60780422, -1.24628551, ...,  0.80665073,
         1.83504101, -0.97708138],
       [-1.23700596, -0.44496982, -0.43109771, ..., -0.67726198,
         0.56710117,  0.80030109],
       [ 1.24356042,  0.93916037,  0.10404034, ..., -0.34105388,
        -0.15601753, -0.02441573]])

In [8]:
y

array(['female', 'female', 'female', ..., 'male', 'male', 'male'],
      dtype=object)

In [7]:
# split the data into two parts: traind and test
x_train, x_test, y_train, y_test = train_test_split(X, y , test_size = 0.2, stratify=y)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(3456, 50) (864, 50) (3456,) (864,)


In [11]:
# 3456 images for train and 864 images for test!

#### Training Machine Learning Model

In [12]:
model_svc = SVC()

param_grid = {'C':[0.5,1,10,20,30,50],
             'kernel':['rbf', 'poly'],
             'gamma':[0.1,0.05,0.01,0.001,0.002,0.005],
              'probability':[True],
             'coef0':[0,1]}

In [13]:
model_grid = GridSearchCV(model_svc, 
                          param_grid=param_grid, 
                          scoring='accuracy',
                         cv=3,
                         verbose=2)

In [14]:
model_grid.fit(x_train,y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits
[CV] END C=0.5, coef0=0, gamma=0.1, kernel=rbf, probability=True; total time=   1.7s
[CV] END C=0.5, coef0=0, gamma=0.1, kernel=rbf, probability=True; total time=   1.7s
[CV] END C=0.5, coef0=0, gamma=0.1, kernel=rbf, probability=True; total time=   1.7s
[CV] END C=0.5, coef0=0, gamma=0.1, kernel=poly, probability=True; total time=   1.1s
[CV] END C=0.5, coef0=0, gamma=0.1, kernel=poly, probability=True; total time=   1.1s
[CV] END C=0.5, coef0=0, gamma=0.1, kernel=poly, probability=True; total time=   1.0s
[CV] END C=0.5, coef0=0, gamma=0.05, kernel=rbf, probability=True; total time=   1.4s
[CV] END C=0.5, coef0=0, gamma=0.05, kernel=rbf, probability=True; total time=   1.5s
[CV] END C=0.5, coef0=0, gamma=0.05, kernel=rbf, probability=True; total time=   1.4s
[CV] END C=0.5, coef0=0, gamma=0.05, kernel=poly, probability=True; total time=   1.0s
[CV] END C=0.5, coef0=0, gamma=0.05, kernel=poly, probability=True; total time=

[CV] END C=1, coef0=0, gamma=0.001, kernel=poly, probability=True; total time=   0.8s
[CV] END C=1, coef0=0, gamma=0.002, kernel=rbf, probability=True; total time=   1.2s
[CV] END C=1, coef0=0, gamma=0.002, kernel=rbf, probability=True; total time=   1.2s
[CV] END C=1, coef0=0, gamma=0.002, kernel=rbf, probability=True; total time=   1.2s
[CV] END C=1, coef0=0, gamma=0.002, kernel=poly, probability=True; total time=   0.9s
[CV] END C=1, coef0=0, gamma=0.002, kernel=poly, probability=True; total time=   0.9s
[CV] END C=1, coef0=0, gamma=0.002, kernel=poly, probability=True; total time=   0.8s
[CV] END C=1, coef0=0, gamma=0.005, kernel=rbf, probability=True; total time=   1.2s
[CV] END C=1, coef0=0, gamma=0.005, kernel=rbf, probability=True; total time=   1.1s
[CV] END C=1, coef0=0, gamma=0.005, kernel=rbf, probability=True; total time=   1.1s
[CV] END C=1, coef0=0, gamma=0.005, kernel=poly, probability=True; total time=   0.9s
[CV] END C=1, coef0=0, gamma=0.005, kernel=poly, probability

[CV] END C=10, coef0=1, gamma=0.01, kernel=rbf, probability=True; total time=   1.3s
[CV] END C=10, coef0=1, gamma=0.01, kernel=rbf, probability=True; total time=   1.2s
[CV] END C=10, coef0=1, gamma=0.01, kernel=rbf, probability=True; total time=   1.2s
[CV] END C=10, coef0=1, gamma=0.01, kernel=poly, probability=True; total time=   1.0s
[CV] END C=10, coef0=1, gamma=0.01, kernel=poly, probability=True; total time=   1.0s
[CV] END C=10, coef0=1, gamma=0.01, kernel=poly, probability=True; total time=   0.9s
[CV] END C=10, coef0=1, gamma=0.001, kernel=rbf, probability=True; total time=   1.0s
[CV] END C=10, coef0=1, gamma=0.001, kernel=rbf, probability=True; total time=   1.0s
[CV] END C=10, coef0=1, gamma=0.001, kernel=rbf, probability=True; total time=   1.0s
[CV] END C=10, coef0=1, gamma=0.001, kernel=poly, probability=True; total time=   0.6s
[CV] END C=10, coef0=1, gamma=0.001, kernel=poly, probability=True; total time=   0.6s
[CV] END C=10, coef0=1, gamma=0.001, kernel=poly, proba

[CV] END C=30, coef0=0, gamma=0.1, kernel=rbf, probability=True; total time=   1.6s
[CV] END C=30, coef0=0, gamma=0.1, kernel=rbf, probability=True; total time=   1.6s
[CV] END C=30, coef0=0, gamma=0.1, kernel=rbf, probability=True; total time=   1.6s
[CV] END C=30, coef0=0, gamma=0.1, kernel=poly, probability=True; total time=   1.1s
[CV] END C=30, coef0=0, gamma=0.1, kernel=poly, probability=True; total time=   1.1s
[CV] END C=30, coef0=0, gamma=0.1, kernel=poly, probability=True; total time=   1.0s
[CV] END C=30, coef0=0, gamma=0.05, kernel=rbf, probability=True; total time=   1.6s
[CV] END C=30, coef0=0, gamma=0.05, kernel=rbf, probability=True; total time=   1.6s
[CV] END C=30, coef0=0, gamma=0.05, kernel=rbf, probability=True; total time=   1.6s
[CV] END C=30, coef0=0, gamma=0.05, kernel=poly, probability=True; total time=   1.1s
[CV] END C=30, coef0=0, gamma=0.05, kernel=poly, probability=True; total time=   1.0s
[CV] END C=30, coef0=0, gamma=0.05, kernel=poly, probability=True;

[CV] END C=50, coef0=0, gamma=0.002, kernel=rbf, probability=True; total time=   1.3s
[CV] END C=50, coef0=0, gamma=0.002, kernel=rbf, probability=True; total time=   1.3s
[CV] END C=50, coef0=0, gamma=0.002, kernel=rbf, probability=True; total time=   1.3s
[CV] END C=50, coef0=0, gamma=0.002, kernel=poly, probability=True; total time=   0.9s
[CV] END C=50, coef0=0, gamma=0.002, kernel=poly, probability=True; total time=   0.9s
[CV] END C=50, coef0=0, gamma=0.002, kernel=poly, probability=True; total time=   0.9s
[CV] END C=50, coef0=0, gamma=0.005, kernel=rbf, probability=True; total time=   1.6s
[CV] END C=50, coef0=0, gamma=0.005, kernel=rbf, probability=True; total time=   1.5s
[CV] END C=50, coef0=0, gamma=0.005, kernel=rbf, probability=True; total time=   1.5s
[CV] END C=50, coef0=0, gamma=0.005, kernel=poly, probability=True; total time=   0.9s
[CV] END C=50, coef0=0, gamma=0.005, kernel=poly, probability=True; total time=   0.9s
[CV] END C=50, coef0=0, gamma=0.005, kernel=poly,

In [15]:
model_grid.best_params_


{'C': 50, 'coef0': 0, 'gamma': 0.002, 'kernel': 'rbf', 'probability': True}

In [16]:
model_final = model_grid.best_estimator_


In [17]:
model_final.get_params()

{'C': 50,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 0.002,
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

## Model Evaluation
- Classification Report
  - Precision, Recall, f1 Score
- Kappa Score
  - -ve(worst model)
  - 0 to 0.5 (bad model)
  - 0.5 to 0.7 (good model)
  - 0.7 to 0.9 (excellent model)
  - 0.9 to 1.0 (perfect model)
- AUC
  - Less than 0.5 (worst model)
  - 0.5 to 0.6 (bad model)
  - 0.6 to 0.8 (good model)
  - 0.8 to 0.9 (excellent model)
  - 0.9 to 1.0 (perfect model)

In [18]:
y_pred = model_final.predict(x_test) # predicted values 

In [19]:
y_pred

array(['male', 'male', 'female', 'male', 'male', 'male', 'male', 'male',
       'male', 'female', 'female', 'male', 'female', 'female', 'female',
       'female', 'male', 'male', 'female', 'female', 'male', 'female',
       'female', 'male', 'male', 'female', 'female', 'male', 'male',
       'female', 'male', 'male', 'male', 'female', 'male', 'female',
       'female', 'female', 'male', 'female', 'male', 'male', 'female',
       'female', 'male', 'male', 'female', 'female', 'female', 'male',
       'female', 'female', 'male', 'male', 'male', 'male', 'male',
       'female', 'female', 'female', 'female', 'female', 'female',
       'female', 'female', 'female', 'male', 'male', 'female', 'female',
       'female', 'male', 'female', 'female', 'male', 'male', 'male',
       'male', 'male', 'male', 'female', 'female', 'female', 'female',
       'male', 'female', 'female', 'female', 'female', 'female', 'female',
       'male', 'female', 'male', 'male', 'female', 'female', 'male',
       'male

#### Classification Report

In [20]:
cr = metrics.classification_report(y_test, y_pred, output_dict = True) # give true values and predicted values as parameters
pd.DataFrame(cr).T

Unnamed: 0,precision,recall,f1-score,support
female,0.808017,0.802935,0.805468,477.0
male,0.758974,0.764858,0.761905,387.0
accuracy,0.78588,0.78588,0.78588,0.78588
macro avg,0.783496,0.783896,0.783686,864.0
weighted avg,0.78605,0.78588,0.785955,864.0


#### Kappa Score

In [21]:
metrics.cohen_kappa_score(y_test, y_pred) # nearly is equal to 0.6 -> good model

0.5673779596609179

**Area Under Curve (AUC)**

In [22]:
metrics.roc_auc_score(np.where(y_test=="male",1,0), 
                      np.where(y_pred=="male",1,0))
# 0.7973 -> nearly 0.8 -> nearly excellent model

0.7838964458095655

### Save the face recognition model 

In [23]:
import pickle
pickle.dump(model_final, open('./model/model_svm.pickle', mode='wb'))