### Let's import numpy, PIL and some sklearn modules for the popular olivetti dataset and splitting training data.

In [156]:
import numpy as np
from PIL import Image
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
from helpers import EigenfaceHelpers, negative_vector
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Define constants.

In [157]:
# Define constants
IMG_SHAPE = (64, 64)

### Download the dataset, reshape the images into vectors and split it into pieces for training.
- We fetch the olivetti dataset via sklearn
- Olivetti.images is a collection of vectors, raveled 64x64 sized images
- olivetti.target contains the id's of the people in the X array in the corresponding indices
- We give the indices to the train_test_split to track which person is in which index after the function shuffles them, this will later be used to determine whether the algorithm predicted the correct person

In [158]:
# Download Olivetti faces dataset
olivetti = fetch_olivetti_faces()
X = olivetti.images
y = olivetti.target

# Print info on shapes and reshape where necessary
X = X.reshape((400, 4096))
indices = np.arange(len(X))
Xtrain, Xtest, ytrain, ytest, idx_train, idx_test = train_test_split(X, y, indices)

### Construct the average face from the training set.
- Add all training vectors together and divide the sum by the number of images.

In [159]:
training_set = Xtrain
# Average face using numpy
avg_face = training_set.mean(axis=0)

### Derive normalized faces
- Subtract the average face from each of the faces in the training set

In [160]:
# Let's create the matrix A by subtracting the average face from each face in the training set
normalized_faces = []
neg_avg_face = negative_vector(avg_face)
sub = None
for v in training_set:
    sub = np.subtract(v, avg_face)
    normalized_faces.append(sub)

# Convert normalized faces array to a matrix
normalized_faces_matrix = np.asmatrix(normalized_faces)

### Form the covariance matrix
- Transpose the matrix of normalized faces
- Multiply the normalized faces matrix with its transposition

In [161]:
# Form the covariance matrix
normalized_faces_t = np.array(normalized_faces).transpose()

# cov_matrix = (normalized_faces_matrix)(normalized_faces_t)
cov_matrix = np.cov(np.array(normalized_faces))

### Calculate the eigenvalues and eigenvectors for the coavariance matrix
- In order to determine the strongest eigenfaces, we select the eigenvectors with the highest corresponding eigenvalues
- Pair the eigenvalues/eigenvectors
- Sort the pairs based on the highest eigenvalues

In [162]:
# Calculate the eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
eig_pairs = [(eigenvalues[index], eigenvectors[:, index]) for index in range(len(eigenvalues))]

eig_pairs.sort(reverse=True)
eigvalues_sort = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sort = [eig_pairs[index][1] for index in range(len(eigenvalues))]

### Select the 20 best eigenvectors

In [163]:
# Choose the 10 eigenvectors with the highest eigenvalues as the eigenfaces
eigenfaces = np.array(eigvectors_sort[:20]).transpose()

### Create reduced eigenface space and calculate the weights for the projected vectors
- Project the eigenfaces to the training_sets transposition by performing a dot product between the two
- A weight is calculated by performing a dot product between each normalized face and the projections

In [164]:
proj_data = np.dot(training_set.transpose(), eigenfaces)
proj_data = proj_data.transpose()

# Calculate weights for eigenfaces
w = np.array([np.dot(proj_data, i) for i in np.array(normalized_faces)])

### Calculate distance between the weights of each eigenface and the test image
- Create the normalized unknown face
- Calculate the weights of the normalized unknown weights in respect to the projections
- Create the difference vector, which is the weights of the eigenfaces subracted from the weights of the test image
- Find the index of the lowest difference

In [165]:
%load_ext autoreload
%autoreload 2
from src.helpers import predictions

correct_ids = []
predicted_ids = []

# Get images from dataset and convert them to vectors
test_index = 20
# Store the correct ids and the predicted ids in corresponding indices
correct_ids_multi, predicted_ids_multi = predictions(Xtest, y, idx_train, idx_test, avg_face, proj_data, w, "multi", sample_size=10, threshold=5)

correct_ids, predicted_ids = predictions(Xtest, y, idx_train, idx_test, avg_face, proj_data, w)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Counter({13: 4, 24: 2, 12: 1, 18: 1, 22: 1, 27: 1})
Counter({10: 3, 7: 2, 18: 2, 3: 1, 9: 1, 22: 1})
Counter({5: 5, 0: 2, 6: 1, 17: 1, 22: 1})
Counter({12: 3, 39: 2, 24: 1, 8: 1, 22: 1, 20: 1, 19: 1})
Counter({18: 2, 25: 2, 36: 2, 13: 1, 16: 1, 37: 1, 4: 1})
Counter({1: 6, 3: 2, 24: 1, 13: 1})
Counter({39: 2, 12: 2, 24: 1, 4: 1, 14: 1, 8: 1, 22: 1, 16: 1})
Counter({1: 6, 14: 2, 3: 1, 24: 1})
Counter({10: 6, 31: 2, 26: 1, 18: 1})
Counter({4: 3, 8: 2, 39: 2, 22: 2, 12: 1})
Counter({36: 4, 30: 4, 37: 1, 28: 1})
Counter({17: 3, 15: 3, 0: 1, 3: 1, 6: 1, 9: 1})
Counter({18: 4, 10: 3, 31: 3})
Counter({25: 3, 9: 1, 13: 1, 16: 1, 29: 1, 24: 1, 2: 1, 4: 1})
Counter({25: 3, 27: 2, 22: 1, 37: 1, 2: 1, 13: 1, 16: 1})
Counter({29: 5, 34: 2, 30: 1, 20: 1, 38: 1})
Counter({26: 4, 24: 2, 3: 2, 27: 1, 10: 1})
Counter({25: 3, 11: 2, 13: 1, 22: 1, 37: 1, 18: 1, 24: 1})
Counter({17: 2, 39: 2, 6: 1, 22: 1, 0: 1, 35: 1, 4

### Print results

In [166]:
from sklearn.metrics import classification_report
print(classification_report(correct_ids, predicted_ids, zero_division=0))
print(classification_report(correct_ids_multi, predicted_ids_multi, zero_division=0))

              precision    recall  f1-score   support

           0       1.00      0.20      0.33         5
           1       1.00      1.00      1.00         1
           2       1.00      0.75      0.86         4
           3       0.43      1.00      0.60         3
           4       0.00      0.00      0.00         2
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         2
           7       0.75      0.75      0.75         4
           8       1.00      1.00      1.00         5
           9       1.00      1.00      1.00         2
          10       1.00      1.00      1.00         3
          11       1.00      0.67      0.80         3
          12       0.00      0.00      0.00         2
          13       1.00      1.00      1.00         4
          14       1.00      1.00      1.00         3
          15       0.00      0.00      0.00         1
          16       0.50      1.00      0.67         2
          17       1.00    