### Let's import numpy, PIL and some sklearn modules for the popular olivetti dataset and splitting training data.

In [1369]:
import numpy as np
from PIL import Image
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
from helpers import EigenfaceHelpers, negative_vector
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Define constants.

In [1370]:
# Define constants
IMG_SHAPE = (64, 64)

### Download the dataset, reshape the images into vectors and split it into pieces for training.
- We fetch the olivetti dataset via sklearn
- Olivetti.images is a collection of vectors, raveled 64x64 sized images
- olivetti.target contains the id's of the people in the X array in the corresponding indices
- We give the indices to the train_test_split to track which person is in which index after the function shuffles them, this will later be used to determine whether the algorithm predicted the correct person

In [1371]:
# Download Olivetti faces dataset
olivetti = fetch_olivetti_faces()
X = olivetti.images
y = olivetti.target

# Print info on shapes and reshape where necessary
X = X.reshape((400, 4096))
indices = np.arange(len(X))
Xtrain, Xtest, ytrain, ytest, idx_train, idx_test = train_test_split(X, y, indices)

### Construct the average face from the training set.
- Add all training vectors together and divide the sum by the number of images.

In [1372]:
training_set = Xtrain
# Average face using numpy
avg_face = training_set.mean(axis=0)

### Derive normalized faces
- Subtract the average face from each of the faces in the training set

In [1373]:
# Let's create the matrix A by subtracting the average face from each face in the training set
normalized_faces = []
neg_avg_face = negative_vector(avg_face)
sub = None
for v in training_set:
    sub = np.subtract(v, avg_face)
    normalized_faces.append(sub)

# Convert normalized faces array to a matrix
normalized_faces_matrix = np.asmatrix(normalized_faces)

### Form the covariance matrix
- Transpose the matrix of normalized faces
- Multiply the normalized faces matrix with its transposition

In [1374]:
# Form the covariance matrix
normalized_faces_t = np.array(normalized_faces).transpose()

# cov_matrix = (normalized_faces_matrix)(normalized_faces_t)
cov_matrix = np.cov(np.array(normalized_faces))

### Calculate the eigenvalues and eigenvectors for the coavariance matrix
- In order to determine the strongest eigenfaces, we select the eigenvectors with the highest corresponding eigenvalues
- Pair the eigenvalues/eigenvectors
- Sort the pairs based on the highest eigenvalues

In [1375]:
# Calculate the eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
eig_pairs = [(eigenvalues[index], eigenvectors[:, index]) for index in range(len(eigenvalues))]

eig_pairs.sort(reverse=True)
eigvalues_sort = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sort = [eig_pairs[index][1] for index in range(len(eigenvalues))]

### Select the 20 best eigenvectors

In [1376]:
# Choose the 10 eigenvectors with the highest eigenvalues as the eigenfaces
eigenfaces = np.array(eigvectors_sort[:30]).transpose()

### Create reduced eigenface space and calculate the weights for the projected vectors
- Project the eigenfaces to the training_sets transposition by performing a dot product between the two
- A weight is calculated by performing a dot product between each normalized face and the projections

In [1377]:
proj_data = np.dot(training_set.transpose(), eigenfaces)
proj_data = proj_data.transpose()

# Calculate weights for eigenfaces
w = np.array([np.dot(proj_data, i) for i in np.array(normalized_faces)])

### Calculate distance between the weights of each eigenface and the test image
- Create the normalized unknown face
- Calculate the weights of the normalized unknown weights in respect to the projections
- Create the difference vector, which is the weights of the eigenfaces subracted from the weights of the test image
- Find the index of the lowest difference

In [1378]:
%load_ext autoreload
%autoreload 2
from src.helpers import predict

correct_ids = []
predicted_ids = []

# Get images from dataset and convert them to vectors
test_index = 20
# Store the correct ids and the predicted ids in corresponding indices
correct_ids_multi, predicted_ids_multi = predict(Xtest, y, idx_train, idx_test, avg_face, proj_data, w, type="", sample_size=3, threshold=2)

correct_ids, predicted_ids = predict(Xtest, y, idx_train, idx_test, avg_face, proj_data, w)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Print results

In [1379]:
from sklearn.metrics import classification_report
#print(classification_report(correct_ids, predicted_ids, zero_division=0))
print(classification_report(correct_ids_multi, predicted_ids_multi, zero_division=0))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.50      0.67         2
           2       0.67      1.00      0.80         4
           3       0.00      0.00      0.00         0
           5       1.00      1.00      1.00         3
           6       1.00      1.00      1.00         3
           7       1.00      0.75      0.86         4
           8       0.00      0.00      0.00         0
           9       1.00      0.50      0.67         4
          10       1.00      1.00      1.00         2
          11       1.00      1.00      1.00         3
          12       0.67      0.67      0.67         3
          13       1.00      1.00      1.00         3
          14       0.33      1.00      0.50         1
          15       1.00      1.00      1.00         3
          16       0.50      1.00      0.67         1
          17       1.00      1.00      1.00         2
          18       1.00    