### Let's import numpy, PIL and some sklearn modules for the popular olivetti dataset and splitting training data.

In [1]:
import numpy as np
from PIL import Image
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
from helpers import EigenfaceHelpers, negative_vector

### Define constants.

In [2]:
# Define constants
IMG_SHAPE = (64, 64)

### Download the dataset, reshape the images into vectors and split it into pieces for training.
- We fetch the olivetti dataset via sklearn
- Olivetti.images is a collection of vectors, raveled 64x64 sized images
- olivetti.target contains the id's of the people in the X array in the corresponding indices
- We give the indices to the train_test_split to track which person is in which index after the function shuffles them, this will later be used to determine whether the algorithm predicted the correct person

In [3]:
# Download Olivetti faces dataset
olivetti = fetch_olivetti_faces()
X = olivetti.images
y = olivetti.target

# Print info on shapes and reshape where necessary
X = X.reshape((400, 4096))
indices = np.arange(len(X))
Xtrain, Xtest, ytrain, ytest, idx_train, idx_test = train_test_split(X, y, indices)

### Construct the average face from the training set.
- Add all training vectors together and divide the sum by the number of images.

In [4]:
training_set = Xtrain
# Average face using numpy
avg_face = training_set.mean(axis=0)

### Derive normalized faces
- Subtract the average face from each of the faces in the training set

In [5]:
# Let's create the matrix A by subtracting the average face from each face in the training set
normalized_faces = []
neg_avg_face = negative_vector(avg_face)
sub = None
for v in training_set:
    sub = np.subtract(v, avg_face)
    normalized_faces.append(sub)

# Convert normalized faces array to a matrix
normalized_faces_matrix = np.asmatrix(normalized_faces)

### Form the covariance matrix
- Transpose the matrix of normalized faces
- Multiply the normalized faces matrix with its transposition

In [6]:
# Form the covariance matrix
normalized_faces_t = np.array(normalized_faces).transpose()

# cov_matrix = (normalized_faces_matrix)(normalized_faces_t)
cov_matrix = np.cov(np.array(normalized_faces))

### Calculate the eigenvalues and eigenvectors for the coavariance matrix
- In order to determine the strongest eigenfaces, we select the eigenvectors with the highest corresponding eigenvalues
- Pair the eigenvalues/eigenvectors
- Sort the pairs based on the highest eigenvalues

In [7]:
# Calculate the eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
eig_pairs = [(eigenvalues[index], eigenvectors[:, index]) for index in range(len(eigenvalues))]

eig_pairs.sort(reverse=True)
eigvalues_sort = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sort = [eig_pairs[index][1] for index in range(len(eigenvalues))]

### Select the 20 best eigenvectors

In [8]:
# Choose the 10 eigenvectors with the highest eigenvalues as the eigenfaces
eigenfaces = np.array(eigvectors_sort[:20]).transpose()

### Create reduced eigenface space and calculate the weights for the projected vectors
- Project the eigenfaces to the training_sets transposition by performing a dot product between the two
- A weight is calculated by performing a dot product between each normalized face and the projections

In [9]:
proj_data = np.dot(training_set.transpose(), eigenfaces)
proj_data = proj_data.transpose()

# Calculate weights for eigenfaces
w = np.array([np.dot(proj_data, i) for i in np.array(normalized_faces)])

### Calculate distance between the weights of each eigenface and the test image
- Create the normalized unknown face
- Calculate the weights of the normalized unknown weights in respect to the projections
- Create the difference vector, which is the weights of the eigenfaces subracted from the weights of the test image
- Find the index of the lowest difference

In [10]:


from src.helpers import predictions

correct_ids = []
predicted_ids = []

# Get images from dataset and convert them to vectors
test_index = 20
# Store the correct ids and the predicted ids in corresponding indices
correct_ids_multi, predicted_ids_multi = predictions(Xtest, y, idx_train, idx_test, avg_face, proj_data, w, "multi", multi_n=200)

correct_ids, predicted_ids = predictions(Xtest, y, idx_train, idx_test, avg_face, proj_data, w, "")

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104


### Print results

In [11]:
from sklearn.metrics import classification_report
print(classification_report(correct_ids, predicted_ids, zero_division=0))
print(classification_report(correct_ids_multi, predicted_ids_multi, zero_division=0))

              precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      1.00      1.00         1
           2       0.67      1.00      0.80         2
           3       1.00      0.33      0.50         3
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         1
           6       0.50      1.00      0.67         2
           7       1.00      0.50      0.67         2
           8       1.00      1.00      1.00         1
           9       1.00      0.50      0.67         2
          10       1.00      1.00      1.00         3
          11       1.00      0.67      0.80         3
          12       0.00      0.00      0.00         0
          13       1.00      1.00      1.00         1
          14       0.71      1.00      0.83         5
          15       1.00      0.33      0.50         3
          16       0.60      1.00      0.75         3
          17       0.75    