In [12]:
from glob import glob
import os
import numpy as np
import cv2
from time import time
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [13]:

# Specify the path to your dataset
dataset_path = "./training"

# Read images and labels from the dataset
images = []
labels = []

# Define the desired dimensions for your images
desired_width = 100
desired_height = 100
print(images)
print(labels)

[]
[]


In [14]:
for person_id, person_folder in enumerate(sorted(glob(os.path.join(dataset_path, "*")))):
    person_name = os.path.basename(person_folder)
    for image_path in glob(os.path.join(person_folder, "*.jpg")):
        # Read the image using OpenCV
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        # Resize the image to the desired dimensions
        image = cv2.resize(image, (desired_width, desired_height))
        # Flatten the image and add it to the list
        images.append(image.flatten())
        # Extract the person's name from the filename and add it to the labels
        person_name_in_filename = os.path.splitext(os.path.basename(image_path))[0].split("_")[0]
        labels.append(person_name_in_filename)


In [15]:

# Converting the lists into numpy arrays
X = np.array(images)
y = np.array(labels)

# Splitting the dataset into training dataset and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [16]:
# Compute PCA (eigenfaces) on the face dataset: unsupervised feature extraction

n_components = 150

print("Extracting the top %d eigenfaces from %d faces"
      % (n_components, X_train.shape[0]))
t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=True).fit(X_train)
print("done in %0.3fs" % (time() - t0))

eigenfaces = pca.components_.reshape((n_components, desired_height, desired_width))

print("Projecting the input data on the eigenfaces orthonormal basis")
t0 = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done in %0.3fs" % (time() - t0))

Extracting the top 150 eigenfaces from 384 faces
done in 1.404s
Projecting the input data on the eigenfaces orthonormal basis
done in 0.286s


In [17]:
# Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
clf = GridSearchCV(
    SVC(kernel='rbf', class_weight='balanced'), param_grid
)
clf = clf.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)


Fitting the classifier to the training set
done in 3.886s
Best estimator found by grid search:
SVC(C=1000.0, class_weight='balanced', gamma=0.005)


In [23]:
# Finding the prediction time of Eigenfaces

print("Predicting people's names on the test set")
t0 = time()
y_pred = clf.predict(X_test_pca)
print("done in %0.3fs" % (time() - t0))

Predicting people's names on the test set
done in 0.004s


In [24]:
# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=np.unique(y)))

# Print confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred, labels=np.unique(y))
print("Confusion Matrix:")
print(conf_matrix)

Classification Report:
              precision    recall  f1-score   support

   adityaroy       1.00      0.64      0.78        22
        bald       0.59      0.83      0.69        12
   devillers       0.53      0.62      0.57        13
        modi       0.75      0.86      0.80        14
        rock       0.59      0.62      0.61        16
     ronaldo       0.82      0.74      0.78        19

    accuracy                           0.71        96
   macro avg       0.71      0.72      0.70        96
weighted avg       0.75      0.71      0.71        96

Confusion Matrix:
[[14  1  2  0  4  1]
 [ 0 10  0  2  0  0]
 [ 0  2  8  0  2  1]
 [ 0  1  1 12  0  0]
 [ 0  2  1  2 10  1]
 [ 0  1  3  0  1 14]]


In [25]:
# Calculate and print accuracy, precision, recall, and F1-score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Accuracy: 0.71
Precision: 0.75
Recall: 0.71
F1 Score: 0.71
