In [0]:
import numpy
import os

**Mounting drive on hosted runtime**

In [10]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


**Loading training and test embeddings as well as train and test labels from hosted runtime**

In [0]:
train_emb=numpy.load('/content/gdrive/My Drive/train_embeddings.npy')
test_emb=numpy.load('/content/gdrive/My Drive/test_embeddings.npy')
train_labels=numpy.load('/content/gdrive/My Drive/train_labels.npy',allow_pickle=True)
test_labels=numpy.load('/content/gdrive/My Drive/test_labels.npy',allow_pickle=True)

**Pre-Processing Training and Test Labels**

In [0]:
train_labels_list=[]
test_labels_list=[]
for i in train_labels:
  train_labels_list.append(i[0])
for j in test_labels:
  test_labels_list.append(j[0])
train_labels=numpy.array(train_labels_list)
test_labels=numpy.array(test_labels_list)

**Scaling train and test feature vector using Standard Scaler for better convergence for svm**

In [0]:
from sklearn.preprocessing import StandardScaler
x_train_scale=StandardScaler()
x_test_scale=StandardScaler()
x_train_scaled=x_train_scale.fit_transform(train_emb)
x_test_scaled=x_test_scale.fit_transform(test_emb)

**Using Grid Search CV to get best parameters for C,gamma and kernel hyper-parameters for tuning**

In [15]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10, 100, 1000],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf','linear','poly']}  
  
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3) 
  
# fitting the model for grid search 
grid.fit(x_train_scaled, train_labels) 

Fitting 3 folds for each of 75 candidates, totalling 225 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.040, total=   9.6s
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.6s remaining:    0.0s


[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.041, total=   9.7s
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   19.3s remaining:    0.0s


[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.041, total=   9.8s
[CV] C=0.1, gamma=1, kernel=linear ...................................
[CV] ....... C=0.1, gamma=1, kernel=linear, score=0.324, total=   3.0s
[CV] C=0.1, gamma=1, kernel=linear ...................................
[CV] ....... C=0.1, gamma=1, kernel=linear, score=0.310, total=   3.0s
[CV] C=0.1, gamma=1, kernel=linear ...................................
[CV] ....... C=0.1, gamma=1, kernel=linear, score=0.302, total=   3.0s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ......... C=0.1, gamma=1, kernel=poly, score=0.287, total=   4.8s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ......... C=0.1, gamma=1, kernel=poly, score=0.275, total=   4.8s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ......... C=0.1, gamma=1, kernel=poly, score=0.265, total=   4.7s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] .

[Parallel(n_jobs=1)]: Done 225 out of 225 | elapsed: 91.4min finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf', 'linear', 'poly']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

**Getting the best parameters and SVC initializer for GridSearch CV model**

In [16]:
print(grid.best_params_) 
print(grid.best_estimator_) 

{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)


**Predicting class labels using the above GridSearch CV model and printing the classification report for each class label**

In [17]:
ypred=grid.predict(x_test_scaled)
print(classification_report(test_labels, ypred))

                     precision    recall  f1-score   support

                ANR       0.00      0.00      0.00        10
             Aarthi       0.00      0.00      0.00         6
        AkshayKumar       0.24      0.35      0.29        17
                Ali       0.17      0.29      0.21         7
            Ambresh       0.35      0.60      0.44        10
         AmrishPuri       0.60      0.33      0.43         9
         AnilKapoor       0.48      0.67      0.56        15
         Annapoorna       0.31      0.36      0.33        11
         AnupamKher       0.26      0.38      0.31        13
            Avinash       0.00      0.00      0.00         6
          BabuMohan       0.00      0.00      0.00         2
        Balakrishna       0.00      0.00      0.00         1
           Bharathi       0.00      0.00      0.00         5
         BomanIrani       0.00      0.00      0.00         5
       Brahmanandam       0.25      0.25      0.25         4
      Cochinhaneefa    

  'precision', 'predicted', average, warn_for)


**Getting the confusion matrix for the class labels and calculating the accuracy of each class label and displaying it as a list**

In [21]:
cm = confusion_matrix(test_labels, ypred)
cm = cm.astype('float') / cm.sum(axis=1)[:, numpy.newaxis]
print(cm.diagonal())

[0.         0.         0.35294118 0.28571429 0.6        0.33333333
 0.66666667 0.36363636 0.38461538 0.         0.         0.
 0.         0.         0.25       0.         0.33333333 0.
 0.42105263 0.14285714 0.375      0.16666667 0.         0.
 0.8        0.14285714 0.33333333 0.125      0.15       0.81818182
 0.2        0.         0.2        0.         0.25       0.625
 0.18181818 0.55555556 0.64       0.         0.42857143 0.
 0.66666667 0.25       0.09090909 0.25       0.         0.25
 0.64705882 0.05882353 1.         0.5        0.33333333 0.
 0.25       0.39130435 0.         0.         0.53333333 0.5
 0.35294118 0.83333333 0.21428571 0.3        0.5        0.11111111
 0.         0.         0.09090909 0.42307692 0.         0.
 0.44444444 0.2        0.         0.         0.66666667 0.57142857
 0.11111111 0.         0.53846154 0.25       0.        ]


In [26]:
from sklearn.externals import joblib
joblib.dump(grid,'FaceRecognitionSVMClassifier.pkl')



['FaceRecognitionSVMClassifier.pkl']