Let's import numpy, PIL and some sklearn modules for the popular olivetti dataset and splitting training data

In [45]:
import numpy as np
from PIL import Image
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
from helpers import EigenfaceHelpers, negative_vector

Define constants

In [46]:
# Define constants
IMG_SHAPE = (64, 64)

Download the dataset, reshape the images into vectors and split it into pieces for training.

In [47]:
# Download Olivetti faces dataset
olivetti = fetch_olivetti_faces()
X = olivetti.images
y = olivetti.target
# Print info on shapes and reshape where necessary
X = X.reshape((400, 4096))
indices = np.arange(len(X))
Xtrain, Xtest, ytrain, ytest, idx_train, idx_test = train_test_split(X, y, indices)

Define methods

In [48]:
all_imgs = []
training_set_photo_names = []

# Create an np.array from the vectors
training_set = Xtrain

In [49]:
# Average face using numpy
avg_face = training_set.mean(axis=0)

In [50]:
# Let's create the matrix A by subtracting the average face from each face in the training set
A = []
neg_avg_face = negative_vector(avg_face)
sub = None
for v in training_set:
    sub = np.subtract(v, avg_face)
    A.append(sub)

# Convert A to a matrix
A_m = np.asmatrix(A)

A_t = np.array(A).transpose()

In [51]:
# Form the covariance matrix
cov_matrix = np.cov(np.array(A))

In [52]:
# Calculate the eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
eig_pairs = [(eigenvalues[index], eigenvectors[:, index]) for index in range(len(eigenvalues))]

eig_pairs.sort(reverse=True)
eigvalues_sort = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sort = [eig_pairs[index][1] for index in range(len(eigenvalues))]

In [53]:
# Choose the 10 eigenvectors with the highest eigenvalues as the eigenfaces
eigenfaces = np.array(eigvectors_sort[:20]).transpose()

In [54]:
# Calculate eiganfaces
# Create reduced eigenface space
proj_data = np.dot(training_set.transpose(), eigenfaces)
proj_data = proj_data.transpose()
# Calculate weights for eigenfaces
w = np.array([np.dot(proj_data, i) for i in np.array(A)])

In [55]:
from sklearn.metrics import classification_report

correct_ids = []
predicted_ids = []

# Get images from dataset and convert them to vectors
test_index = 20
for test_index in range(len(Xtest)):
    unknown_face_vector = Xtest[test_index]
    mean_unknown_face = np.subtract(unknown_face_vector, avg_face)
    w_unknown = np.dot(proj_data, mean_unknown_face)
    difference_vector = w - w_unknown
    norms = np.linalg.norm(difference_vector, axis=1)
    index = np.argmin(norms)

    correct_ids.append(y[idx_test[test_index]])
    predicted_ids.append(y[idx_train[index]])

In [56]:
print(classification_report(correct_ids, predicted_ids, zero_division=0))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       1.00      1.00      1.00         2
           2       0.60      0.75      0.67         4
           3       1.00      1.00      1.00         3
           4       1.00      0.50      0.67         2
           5       1.00      1.00      1.00         2
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         2
           8       1.00      0.40      0.57         5
           9       1.00      0.25      0.40         4
          10       1.00      1.00      1.00         2
          11       0.40      1.00      0.57         2
          12       0.50      0.14      0.22         7
          13       1.00      1.00      1.00         4
          14       0.75      1.00      0.86         3
          15       1.00      1.00      1.00         3
          17       0.00      0.00      0.00         0
          18       1.00    