In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from sklearn.preprocessing import LabelEncoder
import joblib

### Load the embeddings and labels from .npz file

In [2]:
data = np.load('../vgg16_face_embeddings.npz')

In [3]:
embeddings, labels = data['embeddings'], data['labels']

In [4]:
embeddings.shape

(130, 25088)

In [5]:
labels

array(['steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard',
       'steven_gerrard', 'steven_gerrard', 'steven_gerrard', 'mo_salah',
       'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah',
       'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah',
       'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah',
       'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah',
       'mo_salah', 'mo_salah', 'mo_salah', 'mo_salah',
       'christiano_ronaldo', 'christiano_ronaldo', 'chr

### Encode labels to numerical

In [6]:
le = LabelEncoder()
labels = le.fit_transform(labels)

In [7]:
# Save the encoder in a joblib file
joblib.dump(le, '../models/encoder.joblib')

['../models/encoder.joblib']

### Split the data into train and test

In [8]:
X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)

### Train the model

In [9]:
# Build the svm classifier
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

### Evaluate the model

In [10]:
y_pred = model.predict(X_test)

In [11]:
y_pred

array([0, 2, 3, 2, 3, 0, 0, 1, 1, 4, 1, 3, 0, 4, 1, 1, 0, 1, 1, 1, 4, 3,
       4, 3, 4, 2])

In [12]:
accuracy = accuracy_score(y_test, y_pred)

In [13]:
accuracy

0.8076923076923077

In [14]:
print(classification_report(y_test, y_pred, target_names=le.classes_))

                    precision    recall  f1-score   support

christiano_ronaldo       0.80      1.00      0.89         4
             messi       1.00      1.00      1.00         8
          mo_salah       1.00      0.75      0.86         4
    steven_gerrard       0.80      0.57      0.67         7
      wayne_rooney       0.40      0.67      0.50         3

          accuracy                           0.81        26
         macro avg       0.80      0.80      0.78        26
      weighted avg       0.85      0.81      0.81        26



In [15]:
print(confusion_matrix(y_test, y_pred))

[[4 0 0 0 0]
 [0 8 0 0 0]
 [1 0 3 0 0]
 [0 0 0 4 3]
 [0 0 0 1 2]]


From the evaluation, our model has an 80% accurracy which means it performs quite well. When looking at the classification report: Christiano ronaldo's, messi's and Mohammed Salah's image have a high f1-score which increases their likelihood of detection while Steven gerrard and Wayne Rooney's images have low f1-score which means their images might not get detected.

### Save the model

In [16]:
joblib.dump(model, "../models/svm_face_classifier.joblib")

['../models/svm_face_classifier.joblib']