In [311]:
import numpy as np
from PIL import Image
import glob
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split

In [312]:
# Define constants
IMG_SHAPE = (64, 64)
# Download Olivetti faces dataset
olivetti = fetch_olivetti_faces()
X = olivetti.images
y = olivetti.target
# Print info on shapes and reshape where necessary
X = X.reshape((400, 4096))
indices = np.arange(len(X))
Xtrain, Xtest, ytrain, ytest, idx_train, idx_test = train_test_split(X, y, indices)
print(ytrain)
print(len(idx_train), len(Xtrain))

[16 11  5  8 30 21 12 21 37 34 32 30  7  3 30 38 35 21  9 22 34 14  0 34
 14 31 15  9 36 14 24 38  3  2 29 10 31 30 29 19 13  2 37 14 33 36 13 19
 15 27 31 37 37 29  9 31 38 17 26 39 22 11 25  0 36 38 23 26  0 18 16 28
 25  9 14 23 27 24 19  0  0  8 38 17  1 15 35 30  8 18  1 22 36  2 28 15
  2  5 39 28 26 12 33 27  1 20 36 23 14  9 18 16 19 37 37 12 11 18  4  4
 15 35 16  7 30  6  7 19 10 22 16  8 20 26  7  0  6  4 25 13 24 37 19 24
  9 16 35 16 35 13  8 32 12 38 16 18 31 15  0  9 22 35 21  4  8 19  4 20
 17 10 29  7  6  7 18 22 21 31 11 29 26 34 12 11 18 25 14 19  2 39  6  4
 22 29 17 29  4 13 14 34 22  2  6 31 39 36  0 26 32 12 26 39 34 26  7 19
 10 29 20 11  3 17 23 10 34 20 28 25 32 18 34  4 29  6  5 32 18 20 23 10
  3  9  1  1 10  2 31  7 14  5 28  3 27  6 39 33 38 15 33  4 35 25  8 15
  3  5 28 39 28 31 33 38 11  6 24  1  3 23 27 25 28  7  5 32 28 15 21 18
 33 33 38 33 34 24  1  9 33 35 16 20]
300 300


In [313]:
def crop_image(img):
    width, height = img.size

    new_width = IMG_SHAPE[0]
    new_height = IMG_SHAPE[1]
    left = (width - new_width)/2 - .5
    top = (height - new_height)/2 - .5
    right = (width + new_width)/2 - .5
    bottom = (height + new_height)/2 - .5

    # Crop the center of the image
    im = img.crop((left, top, right, bottom))

    return im

def img_to_vector(path_to_img):
    # Load the image
    img = Image.open(path_to_img).convert('L')
    img = crop_image(img)

    # Convert the image into n²*1 array
    arr = np.array(img)
    flat_array = arr.ravel()

    return flat_array


def vector_to_img(vector, shape = IMG_SHAPE):
    array = vector.reshape(shape)
    img = Image.fromarray(array)
    return img

def sum_of_vectors(arr: []):
    sum_vector = np.zeros(len(arr[0]))
    for i in range(0, len(arr[0])):
        for v in arr:
            sum_vector[i] += v[i]

    return sum_vector


def scalar_multiply_vector(scalar, v):
    arr = np.array([])
    for x in v:
        arr = np.append(arr, x * scalar)
    return arr


def negative_vector(v):
    new_v = []
    for x in v:
        new_v.append(-x)
    return np.array(new_v)

In [314]:
all_imgs = []
training_set_photo_names = []

# Create an np.array from the vectors
training_set = Xtrain

In [315]:
# Average face using numpy
avg_face = training_set.mean(axis=0)

In [316]:
# Let's create the matrix A by subtracting the average face from each face in the training set
A = []
neg_avg_face = negative_vector(avg_face)
sub = None
for v in training_set:
    sub = np.subtract(v, avg_face)
    A.append(sub)

# Convert A to a matrix
A_m = np.asmatrix(A)

A_t = np.array(A).transpose()

In [317]:
# Form the covariance matrix
cov_matrix = np.cov(np.array(A))

In [318]:
# Calculate the eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
eig_pairs = [(eigenvalues[index], eigenvectors[:,index]) for index in range(len(eigenvalues))]

eig_pairs.sort(reverse=True)
eigvalues_sort  = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sort = [eig_pairs[index][1] for index in range(len(eigenvalues))]

In [319]:
# Choose the 10 eigenvectors with the highest eigenvalues as the eigenfaces
eigenfaces = np.array(eigvectors_sort[:20]).transpose()

In [320]:
# Calculate eiganfaces
# Create reduced eigenface space
proj_data = np.dot(training_set.transpose(), eigenfaces)
proj_data = proj_data.transpose()
# Calculate weights for eigenfaces
w = np.array([np.dot(proj_data,i) for i in np.array(A)])

In [333]:
from sklearn.metrics import classification_report

correct_ids = []
predicted_ids = []

# Get images from dataset and convert them to vectors
test_index = 20
for test_index in range(len(Xtest)):
    unknown_face_vector = Xtest[test_index]
    mean_unknown_face = np.subtract(unknown_face_vector, avg_face)
    w_unknown = np.dot(proj_data, mean_unknown_face)
    difference_vector = w - w_unknown
    norms = np.linalg.norm(difference_vector, axis=1)
    index = np.argmin(norms)

    correct_ids.append(y[idx_test[test_index]])
    predicted_ids.append(y[idx_train[index]])

In [334]:
print(classification_report(correct_ids, predicted_ids, zero_division=0))

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       1.00      1.00      1.00         3
           2       0.38      1.00      0.55         3
           3       1.00      0.67      0.80         3
           4       0.33      1.00      0.50         1
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         2
           7       0.00      0.00      0.00         1
           8       0.75      1.00      0.86         3
           9       1.00      1.00      1.00         1
          10       1.00      1.00      1.00         3
          11       1.00      0.67      0.80         3
          12       0.67      0.50      0.57         4
          13       1.00      1.00      1.00         5
          14       1.00      1.00      1.00         1
          15       1.00      1.00      1.00         1
          16       0.50      1.00      0.67         1
          17       1.00    