In [1]:
import os
import numpy
import pandas as pd
from PIL import Image

In [2]:
data_matrix = numpy.ndarray(shape=(400, 10304), dtype=numpy.dtype('u1'))  # 8-bit unsigned int
label_vector = numpy.empty(shape=(400, 1), dtype=numpy.dtype('u1'))

i = 0
for dirname, _, filenames in os.walk('dataset'):
    for filename in filenames:
        image = Image.open(os.path.join(dirname, filename))
        numpy_image = numpy.asarray(image).flatten()
        data_matrix[i] = numpy_image.copy()
        label_vector[i] = int(dirname[dirname.rindex('\\') + 2:])
        i += 1

print(data_matrix)
print(data_matrix.shape)

[[ 48  49  45 ...  47  46  46]
 [ 34  34  33 ...  37  40  33]
 [ 60  60  62 ...  32  34  34]
 ...
 [112 109 116 ...  93  88  92]
 [111 114 112 ...  88  86  92]
 [110 112 113 ...  92  87  90]]
(400, 10304)


In [3]:
# Odd rows for training
train_data = data_matrix[::2, :]
y_train = label_vector[::2, :].ravel()

# Even rows for testing
test_data = data_matrix[1::2, :]
y_test = label_vector[1::2, :].ravel()

print(test_data)
print(test_data.shape)
print("\n")
print(train_data)
print(train_data.shape)

[[ 34  34  33 ...  37  40  33]
 [ 39  44  53 ...  29  26  29]
 [ 64  76  80 ...  35  37  39]
 ...
 [110 109 111 ...  94  88  90]
 [112 109 116 ...  93  88  92]
 [110 112 113 ...  92  87  90]]
(200, 10304)


[[ 48  49  45 ...  47  46  46]
 [ 60  60  62 ...  32  34  34]
 [ 63  53  35 ...  41  10  24]
 ...
 [114 110 112 ...  93  89  86]
 [113 112 111 ...  87  87  89]
 [111 114 112 ...  88  86  92]]
(200, 10304)


In [4]:
df = pd.DataFrame(train_data)
df['class'] = y_train

In [5]:
#classes --->classes matrix
#mean_mat ---> mean for each class matrix
#samples_c ---> nu of samples per class

classes = numpy.sort(df['class'].unique())
mean_mat = df.groupby('class').mean().values
samples_c = df.groupby('class').size().values

In [6]:
#between classes matrix
#nk (nu of samples for class)
#uk (mean of class)
#u  (mean of all classes)

u = mean_mat.mean(0)

B = numpy.zeros((10304, 10304))

for i in range(classes.size):
    nk = samples_c[i]
    uk = mean_mat[i]
    t = uk - u
    B = B*nk + numpy.matmul(t, t.T)

print(B.shape)

(10304, 10304)


In [7]:
#within class matrix
#class_ss --> class samples
#class_m --> class mean

S = numpy.zeros((10304, 10304))

for i in range(classes.size):
    class_ss = df.loc[df['class'] == i + 1].values[:, :-1]
    class_m = mean_mat[i]
    centered_data = numpy.subtract(class_ss, class_m)
    s = numpy.matmul(centered_data.T, centered_data)
    S = S + s

In [8]:
S_inv = numpy.linalg.inv(S)
S_inv.shape

(10304, 10304)

In [19]:
eigen_values, eigen_vectors = numpy.linalg.eigh(numpy.matmul(S_inv, B))

In [20]:
#sort the eigenvalues and eigenvectors in descending order
sorted_index = numpy.argsort(eigen_values)[::-1]
sorted_eigenvalue = eigen_values[sorted_index]
sorted_eigenvectors = eigen_vectors[:, sorted_index]
sorted_eigenvalue

array([ 1.95758711e+49,  6.70561170e+48,  4.23629485e+48, ...,
       -4.23610290e+48, -6.71033638e+48, -1.94879138e+49])

In [21]:
U = sorted_eigenvectors[:, 0:39]
U.shape

(10304, 39)

In [22]:
X_train = numpy.matmul(train_data, U)
X_test = numpy.matmul(test_data, U)

print(X_train.shape)
print(X_test.shape)

(200, 39)
(200, 39)


In [25]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(X_train, y_train)

# Predict on dataset which model has not seen before
y_pred = knn.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))




Accuracy: 0.93
