In [1]:
from glob import glob
import os
import numpy as np
import cv2
from time import time
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [3]:

# Specify the path to your dataset
dataset_path = "./training"

# Read images and labels from the dataset
images = []
labels = []

# Define the desired dimensions for your images
desired_width = 100
desired_height = 100
print(images)
print(labels)

[]
[]


In [4]:
for person_id, person_folder in enumerate(sorted(glob(os.path.join(dataset_path, "*")))):
    person_name = os.path.basename(person_folder)
    for image_path in glob(os.path.join(person_folder, "*.jpg")):
        # Read the image using OpenCV
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        # Resize the image to the desired dimensions
        image = cv2.resize(image, (desired_width, desired_height))
        # Flatten the image and add it to the list
        images.append(image.flatten())
        # Extract the person's name from the filename and add it to the labels
        person_name_in_filename = os.path.splitext(os.path.basename(image_path))[0].split("_")[0]
        labels.append(person_name_in_filename)


print(images)
print(labels)

[array([2, 2, 2, ..., 1, 1, 1], dtype=uint8), array([ 2,  0,  0, ..., 31, 52, 68], dtype=uint8), array([68, 86, 81, ..., 34, 22, 14], dtype=uint8), array([ 21,  14,  10, ..., 223, 223, 223], dtype=uint8), array([51, 44, 52, ..., 64, 71, 87], dtype=uint8), array([153, 156, 153, ...,  98,  86,  77], dtype=uint8), array([64, 72, 72, ..., 81, 61, 59], dtype=uint8), array([62, 73, 80, ..., 41, 41, 39], dtype=uint8), array([ 51,  69,  64, ..., 159, 160, 159], dtype=uint8), array([178, 179, 181, ..., 190, 189, 188], dtype=uint8), array([253, 253, 253, ...,  45,  45,  44], dtype=uint8), array([39, 44, 35, ..., 25, 21, 22], dtype=uint8), array([60, 57, 58, ..., 38, 45, 47], dtype=uint8), array([ 41,  40,  42, ..., 145, 127, 109], dtype=uint8), array([161, 153, 148, ...,  42,  40,  40], dtype=uint8), array([227, 228, 229, ...,  72,  20,  27], dtype=uint8), array([ 54,  50,  51, ..., 229, 233, 238], dtype=uint8), array([210, 210, 210, ...,  33,  33,  33], dtype=uint8), array([ 97,  96,  95, ..., 

In [6]:

# Convert the lists to numpy arrays
X = np.array(images)
y = np.array(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train,X_test,y_train,y_test)

[[144 143 129 ...  20  29  35]
 [ 86  87  88 ... 226 227 228]
 [ 25  25  26 ... 166 172 170]
 ...
 [ 52  51  51 ...  28  28  28]
 [101 102 100 ... 232 236 230]
 [  0   0   0 ...   0   0   0]] [[ 55  56  59 ...  43  28  23]
 [ 59  72  77 ... 222 221 221]
 [114 121 136 ... 106  86  74]
 ...
 [ 12  12  12 ...   1   1   2]
 [119 125 123 ... 122 121 119]
 [ 78  75  74 ... 230 229 213]] ['bald' 'devillers' 'ronaldo' 'adityaroy' 'rock' 'ronaldo' 'bald'
 'ronaldo' 'devillers' 'devillers' 'ronaldo' 'adityaroy' 'ronaldo'
 'adityaroy' 'bald' 'ronaldo' 'bald' 'ronaldo' 'ronaldo' 'adityaroy'
 'devillers' 'modi' 'ronaldo' 'adityaroy' 'adityaroy' 'adityaroy' 'rock'
 'bald' 'adityaroy' 'bald' 'bald' 'rock' 'ronaldo' 'ronaldo' 'bald' 'rock'
 'devillers' 'adityaroy' 'devillers' 'modi' 'ronaldo' 'adityaroy' 'bald'
 'adityaroy' 'adityaroy' 'ronaldo' 'rock' 'adityaroy' 'ronaldo'
 'adityaroy' 'bald' 'modi' 'modi' 'adityaroy' 'modi' 'devillers'
 'adityaroy' 'adityaroy' 'ronaldo' 'bald' 'adityaroy' 'bald' 'ro

In [7]:
# Compute PCA (eigenfaces) on the face dataset: unsupervised feature extraction

n_components = 150

print("Extracting the top %d eigenfaces from %d faces"
      % (n_components, X_train.shape[0]))
t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=True).fit(X_train)
print("done in %0.3fs" % (time() - t0))

eigenfaces = pca.components_.reshape((n_components, desired_height, desired_width))

print("Projecting the input data on the eigenfaces orthonormal basis")
t0 = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done in %0.3fs" % (time() - t0))

Extracting the top 150 eigenfaces from 384 faces
done in 0.904s
Projecting the input data on the eigenfaces orthonormal basis
done in 0.045s


In [8]:
# Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
clf = GridSearchCV(
    SVC(kernel='rbf', class_weight='balanced'), param_grid
)
clf = clf.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)


Fitting the classifier to the training set
done in 4.391s
Best estimator found by grid search:
SVC(C=1000.0, class_weight='balanced', gamma=0.005)


In [9]:
# Finding the prediction time of Eigenfaces

print("Predicting people's names on the test set")
t0 = time()
y_pred = clf.predict(X_test_pca)
print("done in %0.3fs" % (time() - t0))

Predicting people's names on the test set
done in 0.017s


In [10]:
# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=np.unique(y)))

# Print confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred, labels=np.unique(y))
print("Confusion Matrix:")
print(conf_matrix)

Classification Report:
              precision    recall  f1-score   support

   adityaroy       1.00      0.64      0.78        22
        bald       0.53      0.83      0.65        12
   devillers       0.50      0.54      0.52        13
        modi       0.71      0.71      0.71        14
        rock       0.58      0.69      0.63        16
     ronaldo       0.81      0.68      0.74        19

    accuracy                           0.68        96
   macro avg       0.69      0.68      0.67        96
weighted avg       0.72      0.68      0.69        96

Confusion Matrix:
[[14  1  2  0  4  1]
 [ 0 10  0  1  1  0]
 [ 0  2  7  1  2  1]
 [ 0  3  1 10  0  0]
 [ 0  2  0  2 11  1]
 [ 0  1  4  0  1 13]]


In [11]:
# Calculate and print accuracy, precision, recall, and F1-score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Accuracy: 0.68
Precision: 0.72
Recall: 0.68
F1 Score: 0.69
