In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/coil100'):
    for filename in filenames:
#         print(dirname)
#         print(filename)
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Generate Data Matrix

In [None]:
number_of_persons = 40

In [None]:
def read_single_image(image_path):
    ans = []
    with open(image_path, 'rb') as f:
        assert f.readline() == b'P5\n'
        assert f.readline() == b'92 112\n'
        assert f.readline() == b'255\n'
        #print(f.readline())
        #print(f.readline())
        
        for i in range(10304):
            ans.append(ord(f.read(1)))
    return ans # np.array(ans, dtype='float')
    
def construct_data_frame():
    
    images = []
    persons = []
    
    path = '/kaggle/input/att-database-of-faces/'
    print('Reading Started')
    for x in range(1, number_of_persons + 1):
        current_person_path = path + 's' + str(x) + '/'
        for y in range(1, 11):
            persons.append(str(x))
            images.append(read_single_image(current_person_path + str(y) + '.pgm'))
    print('Reading Finished')
    #print(images)
    print(len(images))
    print(len(images[0]))
    print(len(persons))
    
    images = np.array(images)
    # print(images)
    # print(images[[True if i % 2 == 1 else False for i in range(number_of_persons * 10)]])
    
    return images, persons
            
# df = construct_data_frame()
# df.head()
(D, labels) = construct_data_frame()
#D = pd.DataFrame(D,index=labels)

# Train-Test Split

In [None]:
def custom_train_test_split(data, labels, samples_no, train_indices):
    X_train = []
    y_train = []
    X_test = []
    y_test = []
    for i in range(samples_no):
        if i%10 in train_indices:
            X_train.append(data[i])
            y_train.append(labels[i])
        else:
            X_test.append(data[i])
            y_test.append(labels[i])
    return X_train, y_train, X_test, y_test
X_train, y_train, X_test, y_test = custom_train_test_split(D, labels, len(D), [0, 2, 4, 6, 8])
train_data = pd.DataFrame(X_train,index=y_train)

In [None]:
def classify(X_train, y_train, X_test, y_test, n_neighbors):
    simple_classifier = KNeighborsClassifier(n_neighbors=n_neighbors)
    simple_classifier.fit(X_train, y_train)
    test_samples = len(X_test)
    acc = 0
    for i in range(test_samples):
        result = simple_classifier.predict([X_test[i]])
        if result == y_test[i]:
            acc += 1
    acc /= test_samples
    print(f'Acc at K = {n_neighbors}: {acc*100} %')
    return acc

# PCA

**Calculating mean**

In [None]:
mean = np.mean(train_data)

**Centralize data**

In [None]:
z = train_data - mean

**Covariance Matrix**

In [None]:
cov = (np.matmul(np.transpose(z), z)) / len(train_data)

**Eigen values, Eigen vectors**

In [None]:
eig_values, eig_vectors = np.linalg.eigh(cov)
idx = eig_values.argsort()[::-1]   
eig_values = eig_values[idx]
# eigen_values = np.diag(eig_values)
eig_vectors = eig_vectors[:,idx]

**Choosing dimensionality**

In [None]:
def dimensionality(alpha):
    sum = np.sum(eig_values)
    r = 0
    i = 0
    for value in eig_values: 
        r = r + eig_values[i]
        i = i + 1
        if (r / sum >= alpha):
            break
    return i

In [None]:
r1 = dimensionality(0.8)
r2 = dimensionality(0.85)
r3 = dimensionality(0.9)
r4 = dimensionality(0.95)

r = [r1,r2, r3, r4]
r

**KNN and KNN tuning for different alpha values**

In [None]:
for i in r: 
    U = eig_vectors[:,0:i].T
    projected_train_data = np.array(np.matmul(z, U.T))
    projected_test_data = np.array(np.matmul(X_test - np.array(mean), U.T))
    acc = []
    k_values = [1, 3, 5, 7]
    for k in k_values:
        acc.append(classify(projected_train_data, y_train, projected_test_data, y_test, k))
    plt.plot(k_values, acc)
    plt.xlabel('Number of neighbors')
    plt.ylabel('Acc')
    plt.title('K tunning graph')
    plt.show()

# LDA

### Calculating mean vector for every class

In [None]:
original_dimensions = 10304
label_no = number_of_persons
mean_vector = dict()
for label in labels:
    person_data = train_data.loc[label]
    mean = np.mean(person_data)
    mean_vector[label] = mean

### Calculating Sb and S matrices

In [None]:
total_mean = np.mean(train_data)
S = np.zeros((original_dimensions, original_dimensions))
SB = np.zeros((original_dimensions, original_dimensions))
for label in range(1, 41):
    label = str(label)
    print("Calculating label " + label)
    person_data = train_data.loc[label]
    mean_vector[label]
    # print(mean_vector[label])
    Z = np.array(person_data - mean_vector[label])
    # print(Z)
    S += Z.T.dot(Z)

    # (4, 1) * (1, 4) = (4,4) -> reshape
    nk = person_data.shape[0]
    # print(nk)
    # Reshaping mean_diff
    mean_diff = np.array(mean_vector[label] - total_mean).reshape(original_dimensions, 1)
    # print(mean_diff)
    # print(mean_diff.T)
    SB += nk * (mean_diff).dot(mean_diff.T)
print(S)
print(SB)

### Calculating projection matrix
Replace   $S^-1$ * $B$  by $S^-1$ * $Sb$ <br>
Then ($S^-1$ * $Sb$) $w$ = $\lambda$ $w$ <br>
Then A $w$ = $\lambda$ $w$

In [None]:
A = np.linalg.inv(S).dot(SB)
eig_values, eig_vectors = np.linalg.eigh(A)
# sort eigenvalues descendingly
idx = eig_values.argsort()[::-1]   
eig_values = eig_values[idx]
print(eig_values)
eig_vectors = eig_vectors[:,idx]

# store first n eigenvectors
U = eig_vectors[:, 0:39].T
print(U)

In [None]:
projected_train_data = np.array(np.matmul(X_train, U.T))
projected_test_data = np.array(np.matmul(X_test, U.T))
k_values = [1, 3, 5, 7]
acc = []
for k in k_values:
    acc.append(classify(projected_train_data, y_train, projected_test_data, y_test, k))
plt.plot(k_values, acc)
plt.xlabel('Number of neighbors')
plt.ylabel('Acc')
plt.title('K tunning graph')
plt.show()

# Generate Non-face Data

In [None]:
import random
import os
from PIL import Image

In [None]:
if not os.path.exists('/kaggle/working/coil100'):
    os.mkdir('/kaggle/working/coil100')

n_nonface = 20
    
rotations = random.sample(range(0,360,5), 10)
objects = random.sample(range(1, 101), n_nonface)

In [None]:
# for each object
# for each angle
# read file obj<index>__<angle>.png
# resize to 92x
# create folder if not exists obj<index>
# save as greyscale <index_of_angle>.pgm

in_path = '/kaggle/input/coil100/coil-100/coil-100/'
out_path = '/kaggle/working/coil100/'

for (obj_idx,obj) in enumerate(objects):
    for (angle_idx, angle) in enumerate(rotations):
        filename = in_path + 'obj{}__{}.png'.format(obj, angle)
        dirname = 'obj' + str(obj_idx + 1)
        im = Image.open(filename)
        #display(im)
        im = im.resize((92,112)).convert("L")
        #display(im)
        if not os.path.exists(out_path + dirname):
            os.mkdir(out_path + dirname)
        # try saving as .pgm without specifying anything else
        im.save(out_path + dirname + '/{}.pgm'.format(angle_idx + 1))
        
#with open('/kaggle/working/coil100/obj1_1.pgm', 'w') as f:
#    f.write('hello')

In [None]:
def read_nonface_data():
    images = []
    
    path = '/kaggle/working/coil100/'
    print('Reading Started')
    for x in range(1, n_nonface + 1):
        current_object_path = path + 'obj' + str(x) + '/'
        for y in range(1, 11):
            images.append(read_single_image(current_object_path + str(y) + '.pgm'))
    print('Reading Finished')
    #print(images)
    print(len(images))
    print(len(images[0]))
    
    images = np.array(images)
    
    return images, ['object']*n_nonface*10

temp_D, temp_label = read_nonface_data()
indices = []
for i in range(number_of_persons + 1, number_of_persons + n_nonface + 1):
    indices += [i] * 10
print(indices)
temp_D = pd.DataFrame(temp_D, index=indices)
print(temp_D)
D = pd.concat([D, temp_D], axis = 0)
print(D)
labels += temp_label
print(labels)