In [1]:
# load the np array datasets

import numpy as np

# while loading we need to give the extension .npy. But not while saving.
data = np.load('data.npy')
target = np.load('target.npy')

print(data.shape)
print(target.shape)

(259, 10000)
(259,)


In [2]:
import collections
print(collections.Counter(target))    # check how many images are there in each label

Counter({0: 90, 1: 89, 2: 80})


In [3]:
import pandas as pd
my_data = pd.DataFrame(target,columns=['labels'])
my_data['labels'].unique()

array([0, 1, 2], dtype=int64)

In [4]:
from sklearn.model_selection import train_test_split
train_data,test_data,train_target,test_target = train_test_split(data,target,test_size = 0.2)

In [26]:
# now we cant apply 2500 features to our ML algo as its very big. So we use principle component analysis.
# it is a unsupervised learning algo which analysis the most important features/information and merges the complete data to 
# reduced number of features. for eg. data of 2500 features is stored in 150 features with minimal information loss.

# th number of features are decided using the cummulative variaince ratio i.e. we take number of features and plot the graph
# of cummulative variance where 1.0 denotes no info loss and below onwards represents info loss. So accordigly to achecive our \
# accuracy we can increase/decrease the components for PCA.

from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline

# creating pca object -> using as it is in video.
pca = PCA(n_components=50,whiten=True,random_state=42)
svc = SVC()
model = make_pipeline(pca,svc)

In [27]:
model.fit(train_data,train_target)

Pipeline(steps=[('pca', PCA(n_components=50, random_state=42, whiten=True)),
                ('svc', SVC())])

In [28]:
prediction = model.predict(test_data)


from sklearn.metrics import accuracy_score,confusion_matrix
acc = accuracy_score(test_target,prediction)
confusion_matrix(test_target,prediction)

array([[13,  1,  0],
       [ 0, 21,  0],
       [ 3,  0, 14]], dtype=int64)

In [29]:
acc

0.9230769230769231

In [30]:
from sklearn.metrics import classification_report
print(classification_report(test_target,prediction,target_names=['obama','bush','kshitij']))

              precision    recall  f1-score   support

       obama       0.81      0.93      0.87        14
        bush       0.95      1.00      0.98        21
     kshitij       1.00      0.82      0.90        17

    accuracy                           0.92        52
   macro avg       0.92      0.92      0.92        52
weighted avg       0.93      0.92      0.92        52



In [31]:
import joblib
joblib.dump(model,'SVM-Face_Recognition.sav')

['SVM-Face_Recognition.sav']