In [1]:
# Loading and displaying an image
# from PIL import Image
# img = Image.open('/home/dvveera/db/orl/s1/1.pgm')
# print(img.format, img.size, img.mode)
# display(img)

In [2]:
# Load the dataset
import load_dataset
import numpy as np
from PIL import Image

dir_src = '/home/dvveera/dvveera_hdd2tb/orl'
data_label = load_dataset.load_data_label(dir_src)

# Training, validation and test data with labels
# data_label = list(train, valid, test, mean_train)
train_storage, train_labels = zip(*data_label[0])
valid_storage, valid_labels = zip(*data_label[1])
test_storage, test_labels = zip(*data_label[2])
mean_storage = data_label[3]

Assigned Labels to Training, Validation and Test Data
Train data: 20/240
Train data: 40/240
Train data: 60/240
Train data: 80/240
Train data: 100/240
Train data: 120/240
Train data: 140/240
Train data: 160/240
Train data: 180/240
Train data: 200/240
Train data: 220/240
Valid data: 20/80
Valid data: 40/80
Valid data: 60/80
Test data: 20/80
Test data: 40/80
Test data: 60/80


In [3]:
%%time
# Linear Discriminant Analysis
from collections import Counter
# Converting images to a one-dimensional vector
train_1d = [t.flatten() for t in train_storage]
# Calculate the mean of images in each class
label_dict = Counter(train_labels)
mean = {}
for idx, t in enumerate(train_1d):
    lab = train_labels[idx]
    if lab not in mean:
        mean[lab] = t / label_dict[lab]
    else:
        mean[lab] += t / label_dict[lab]
print('Calculated the means of the images in each class and the total mean of all images')
# print(mean)
# Calculate the mean of all images
mean_all = np.sum(train_1d, axis = 0) / len(train_1d)
# print(mean_all)

Calculated the means of the images in each class and the total mean of all images
CPU times: user 6 ms, sys: 4.26 ms, total: 10.3 ms
Wall time: 9.66 ms


In [4]:
%%time
# Mean-centering the images in each class
train_center = []
for idx, t in enumerate(train_1d):
    lab = train_labels[idx]
    train_center.append(t - mean[lab])
# print(train_center)
print('Centered the images in each class')

Centered the images in each class
CPU times: user 4.23 ms, sys: 4.15 ms, total: 8.38 ms
Wall time: 7.74 ms


In [5]:
%%time
# Center the class means
mean_center = []
for idx, m in enumerate(mean):
    mean_center.append(mean[m] - mean_all)
# print(mean_center)
print('Centered the class means')

Centered the class means
CPU times: user 2.29 ms, sys: 0 ns, total: 2.29 ms
Wall time: 1.68 ms


In [6]:
%%time
# Create data matrix
data_matrix = np.transpose(train_center)
# print(data_matrix)
print('Created the data matrix')

Created the data matrix
CPU times: user 153 µs, sys: 7.48 ms, total: 7.64 ms
Wall time: 6.93 ms


In [None]:
# Find orthonormal basis for the data matrix
# U = np.linalg.qr(data_matrix)
# print(U)

In [7]:
%%time
# Find orthonormal basis for the data matrix
# Create covariance matrix
covariance = np.dot(data_matrix, np.transpose(data_matrix))
# Compute eigenvalues and eigenvectors of covariance matrix
eig_values, eig_vectors = np.linalg.eigh(covariance)
# print(eig_values)

CPU times: user 3min 39s, sys: 1.19 s, total: 3min 40s
Wall time: 55.6 s


In [8]:
%%time
# Project centered images into orthonormal basis
proj_train_center = np.dot(np.transpose(eig_vectors), data_matrix)
# Project centered means into orthonormal basis
proj_mean_center = np.dot(np.transpose(eig_vectors), np.transpose(mean_center))
print('Projected centered images and centered means into the orthonormal basis')
eig_values_ortho = eig_values
eig_vectors_ortho = eig_vectors

Projected centered images and centered means into the orthonormal basis
CPU times: user 1.48 s, sys: 3.92 ms, total: 1.48 s
Wall time: 376 ms


In [10]:
%%time
# Calculate within class scatter matrix
Sw = []
for idx, t in enumerate(np.transpose(proj_train_center)):
    if idx == 0:
        Sw = np.outer(t, t)
    else:
        Sw += np.outer(t, t)
print('Calculated the within-class scatter matrix')
#print(Sw)

Calculated the within-class scatter matrix
CPU times: user 36.1 s, sys: 39.2 s, total: 1min 15s
Wall time: 1min 15s


In [11]:
%%time
# Calculate between class scatter matrix
Sb = []
tr = np.transpose(proj_mean_center)
for idx, m in enumerate(mean):
    if idx == 0:
        Sb = mean[m] * np.outer(tr[idx], tr[idx])
    else:
        Sb += mean[m] * np.outer(tr[idx], tr[idx])
print('Calculated the between-class scatter matrix')
#print(Sb)

Calculated the between-class scatter matrix
CPU times: user 9.28 s, sys: 12.9 s, total: 22.2 s
Wall time: 22.2 s


In [12]:
%%time
# Compute the generalized eigen values and eigen vectors
A = np.dot(np.linalg.inv(Sw), Sb)
eig_values, eig_vectors = np.linalg.eigh(A)
# print(eig_values)

CPU times: user 5min 23s, sys: 1.96 s, total: 5min 25s
Wall time: 1min 21s


In [13]:
%%time
# Sort eigenvalues and eigenvectors
eig = list(zip(eig_values, np.transpose(eig_vectors)))
sorted(eig, key=lambda x: x[0], reverse = True)
print('Sorted eigen values and eigen vectors from high to low')
# print(eig)

Sorted eigen values and eigen vectors from high to low
CPU times: user 3.1 ms, sys: 0 ns, total: 3.1 ms
Wall time: 3 ms


In [14]:
%%time
# Select the first C - 1 eigen vectors
new_eig = []
for idx, e in enumerate(eig):
    if idx < (len(label_dict) - 1):
        new_eig.append(e)
eig_values, eig_vectors = zip(*new_eig)
print('Selected the first C - 1 eigenvectors')
# print(new_eig)

Selected the first C - 1 eigenvectors
CPU times: user 3.1 ms, sys: 0 ns, total: 3.1 ms
Wall time: 2.67 ms


In [15]:
%%time
# Project training images into orthonormal basis
proj_data_ortho = np.dot(np.transpose(eig_vectors_ortho), np.transpose(train_1d))
# Project the orthonormal projected images onto Fisher basis vectors
proj_data_matrix = np.dot(eig_vectors, proj_data_ortho)

CPU times: user 1.18 s, sys: 6.73 ms, total: 1.19 s
Wall time: 305 ms


In [16]:
%%time
# Identify test images
valid_1d = [t.flatten() for t in valid_storage]
# Create data matrix
valid_data_matrix = np.transpose(valid_1d)
# Project test images into the same subspace as training images
proj_valid_ortho = np.dot(np.transpose(eig_vectors_ortho), valid_data_matrix)
proj_valid_matrix = np.dot(eig_vectors, proj_valid_ortho)

CPU times: user 521 ms, sys: 5.06 ms, total: 526 ms
Wall time: 135 ms


In [17]:
%%time
# Calculate L2 norms
tr1 = np.transpose(proj_valid_matrix)
tr2 = np.transpose(proj_data_matrix)
l2 = []
for a in tr1:
    l1 = [np.linalg.norm(a - b) for b in tr2]
    l2.append(l1)

CPU times: user 83 ms, sys: 19.4 ms, total: 102 ms
Wall time: 75 ms


In [18]:
%%time
# Find the index of the training image that matches closely with the test image
idx = [l.index(min(l)) for l in l2]

CPU times: user 460 µs, sys: 69 µs, total: 529 µs
Wall time: 531 µs


In [19]:
%%time
# Find the label for the test images
out = []
for i in idx:
    out.append(train_labels[i])
# print(list(zip(out, idx)))
# print(list(zip(valid_labels, idx)))

CPU times: user 12 µs, sys: 0 ns, total: 12 µs
Wall time: 13.6 µs


In [20]:
%%time
# Find the accuracy
correct = 0
for i in range(len(out)):
    if out[i] == valid_labels[i]:
        correct += 1
accuracy = correct * 100 / len(out)
print('Accuracy(%): ' + str(accuracy))

Accuracy(%): 90.0
CPU times: user 68 µs, sys: 10 µs, total: 78 µs
Wall time: 56.5 µs


In [None]:
# Identify test images
test_1d = [t.flatten() for t in test_storage]
# Create data matrix
test_data_matrix = np.transpose(test_1d)
# Project test images into the same subspace as training images
proj_test_ortho = np.dot(np.transpose(eig_vectors_ortho), test_data_matrix)
proj_test_matrix = np.dot(eig_vectors, proj_test_ortho)

In [None]:
# Calculate L2 norms
tr1 = np.transpose(proj_test_matrix)
tr2 = np.transpose(proj_data_matrix)
l2 = []
for a in tr1:
    l1 = [np.linalg.norm(a - b) for b in tr2]
    l2.append(l1)

In [None]:
# Find the index of the training image that matches closely with the test image
idx = [l.index(min(l)) for l in l2]

In [None]:
# Find the label for the test images
out = []
for i in idx:
    out.append(train_labels[i])

In [None]:
# Find the accuracy
correct = 0
for i in range(len(out)):
    if out[i] == test_labels[i]:
        correct += 1
accuracy = correct * 100 / len(out)
print('Accuracy(%): ' + str(accuracy))

In [None]:
# Print images (true / predicted)
j = 0
for i in range(len(out)):
    dis1 = Image.fromarray(train_storage[idx[i]])
    display(dis1)
    print('Predicted:' + str(out[i]))
    dis2 = Image.fromarray(test_storage[j])
    display(dis2)
    print('True:' + str(test_labels[j]))
    print('-----------------------------------')
    j += 1
# print(list(zip(out, idx)))
# print(list(zip(test_labels, idx)))