## Special Topics in Information Systems
### Lab 2


# Face Recognition


#### by: Youssef Sherif ID: 72, and Ramez Maher ID: 27

### Problem Statement:

It is required to perform facial recognition -tell the subject's id given an image of him/her- on a database of 40 subjects. Each subject has 10 images in the database.

In [1]:
from skimage.io import imread
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics 
import numpy as np
import os

Some useful functions

In [2]:
def compute_mean(data):
    means = []
    trans_data = np.transpose(data)
    for i in range(len(trans_data)):
        means.append(np.mean(trans_data[i]))
    return np.array(means)

def center_data(data, means):
    transpose_means = np.transpose(means)
    Z = []
    for i in range(len(data)):
        Z.append(data[i] - transpose_means)
    return np.array(Z)

def compute_cov_matrix(Z):
    n = len(Z)
    Z_trans = np.transpose(Z)
    sigma = np.matmul(Z_trans, Z)/n
    return sigma

def choose_eigenvectors(eigenvalues, alpha):
    total_variance = np.sum(eigenvalues)
    variance_captured = 0
    fraction = 0
    index = len(eigenvalues)-1
    while fraction < alpha:
        variance_captured+= eigenvalues[index]
        index-= 1
        fraction = variance_captured/total_variance
    return index                                

def reduce_basis(eigenvectors, index):
    i = len(eigenvectors)-1
    u = []
    while i > index:
        u.append(eigenvectors[i])
        i-= 1
    return np.array(u)

def get_reduced_eigenvectors(data, alpha):
    means = compute_mean(data)
    Z = center_data(data, means)
    cov_matrix = compute_cov_matrix(Z)
    eigenVals, eigenVects = np.linalg.eigh(cov_matrix)
    index = choose_eigenvectors(eigenVals, alpha)
    Ur = reduce_basis(eigenVects, index)
    return Ur
    
def pca(Ur, data):
    Ur_trans = np.transpose(Ur)
    A = np.matmul(data, Ur_trans)
    return A
        
        



#### Generating data matrix and labels vector

In [3]:
DATAPATH = "data"

features_vec = []
labels = []
for i in range(1, 41):
    path = os.path.join(DATAPATH, "s"+str(i))
    for img in os.listdir(path):
        image = imread(os.path.join(path, img), as_gray=True)
        features_vec.append(image.flatten())
        labels.append(i)

data_matrix = np.array(features_vec)

#### Spliting dataset into train and test sets

In [4]:
x1_train = []
x1_test = []
y1_train = []
y1_test = []

#Primary train and test sets split 50/50
for i in range(0, 400):
    if i%2 == 1:
        x1_train.append(data_matrix[i])
        y1_train.append(labels[i])
    else:
        x1_test.append(data_matrix[i])
        y1_test.append(labels[i])
x1_train = np.array(x1_train)
x1_test = np.array(x1_test)

#### Dimensionality reduction using PCA and Classification

##### for alpha = 0.8

In [5]:
Ur1 = get_reduced_eigenvectors(x1_train, 0.8)
A1_train = pca(Ur1, x1_train)
A1_test = pca(Ur1, x1_test)

Training

In [6]:
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(A1_train, y1_train)

KNeighborsClassifier(n_neighbors=1)

Test

In [7]:
y_predicted = classifier.predict(A1_test)
results = metrics.classification_report(y1_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       1.00      0.40      0.57         5
           2       0.80      0.80      0.80         5
           3       0.75      0.60      0.67         5
           4       0.83      1.00      0.91         5
           5       0.56      1.00      0.71         5
           6       1.00      1.00      1.00         5
           7       0.83      1.00      0.91         5
           8       1.00      1.00      1.00         5
           9       0.71      1.00      0.83         5
          10       1.00      1.00      1.00         5
          11       1.00      0.80      0.89         5
          12       1.00      0.80      0.89         5
          13       0.80      0.80      0.80         5
          14       0.83      1.00      0.91         5
          15       1.00      1.00      1.00         5
          16       1.00      0.60      0.75         5
          17       0.83      1.00      0.91         5
          18       0.80    

When alpha is 0.8, the accuracy is 0.85 

##### for alpha = 0.85

In [8]:
Ur1 = get_reduced_eigenvectors(x1_train, 0.85)
A1_train = pca(Ur1, x1_train)
A1_test = pca(Ur1, x1_test)
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(A1_train, y1_train)
y_predicted = classifier.predict(A1_test)
results = metrics.classification_report(y1_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       0.75      0.60      0.67         5
           2       1.00      1.00      1.00         5
           3       1.00      0.80      0.89         5
           4       1.00      1.00      1.00         5
           5       0.62      1.00      0.77         5
           6       1.00      1.00      1.00         5
           7       0.83      1.00      0.91         5
           8       1.00      1.00      1.00         5
           9       0.67      0.80      0.73         5
          10       1.00      1.00      1.00         5
          11       1.00      1.00      1.00         5
          12       1.00      1.00      1.00         5
          13       0.83      1.00      0.91         5
          14       1.00      1.00      1.00         5
          15       1.00      1.00      1.00         5
          16       0.80      0.80      0.80         5
          17       1.00      1.00      1.00         5
          18       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


for alpha 0.85, the accuracy increased to 0.88.

##### for alpha = 0.9

In [9]:
Ur1 = get_reduced_eigenvectors(x1_train, 0.9)
A1_train = pca(Ur1, x1_train)
A1_test = pca(Ur1, x1_test)
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(A1_train, y1_train)
y_predicted = classifier.predict(A1_test)
results = metrics.classification_report(y1_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       1.00      0.60      0.75         5
           2       0.83      1.00      0.91         5
           3       1.00      0.80      0.89         5
           4       1.00      1.00      1.00         5
           5       0.50      1.00      0.67         5
           6       1.00      1.00      1.00         5
           7       1.00      1.00      1.00         5
           8       1.00      1.00      1.00         5
           9       0.67      0.80      0.73         5
          10       1.00      1.00      1.00         5
          11       1.00      1.00      1.00         5
          12       1.00      1.00      1.00         5
          13       1.00      1.00      1.00         5
          14       1.00      1.00      1.00         5
          15       0.83      1.00      0.91         5
          16       1.00      0.60      0.75         5
          17       1.00      1.00      1.00         5
          18       0.80    

The accuracy increased again to 0.91, as we are increasing alpha(accepted fraction of total variance)

##### for alpha = 0.95

In [11]:
Ur1 = get_reduced_eigenvectors(x1_train, 0.95)
A1_train = pca(Ur1, x1_train)
A1_test = pca(Ur1, x1_test)
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(A1_train, y1_train)
y_predicted = classifier.predict(A1_test)
results = metrics.classification_report(y1_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       1.00      0.60      0.75         5
           2       1.00      1.00      1.00         5
           3       0.83      1.00      0.91         5
           4       1.00      1.00      1.00         5
           5       0.56      1.00      0.71         5
           6       1.00      1.00      1.00         5
           7       1.00      1.00      1.00         5
           8       1.00      1.00      1.00         5
           9       1.00      0.80      0.89         5
          10       1.00      1.00      1.00         5
          11       1.00      1.00      1.00         5
          12       1.00      1.00      1.00         5
          13       1.00      1.00      1.00         5
          14       1.00      1.00      1.00         5
          15       0.83      1.00      0.91         5
          16       1.00      0.80      0.89         5
          17       1.00      1.00      1.00         5
          18       1.00    

Best accuracy yet 0.93, for alpha = 0.95.
Depending on these results we can conclude that, the more the value of accepted fraction of total variance, the better the accuracy will be, but this will also mean more features to consider, which could have negative effects.

#### Hyper-parameters tuning

Starting from this part, and given that using alpha = 0.95 yielded best accuracy, it will be used for all the next tests. 

In [29]:
Ur1 = get_reduced_eigenvectors(x1_train, 0.95)
A1_train = pca(Ur1, x1_train)
A1_test = pca(Ur1, x1_test)


##### for K = 3

In [31]:
classifier = KNeighborsClassifier(n_neighbors=3, weights='distance')
classifier.fit(A1_train, y1_train)
y_predicted = classifier.predict(A1_test)
results = metrics.classification_report(y1_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       1.00      0.60      0.75         5
           2       0.62      1.00      0.77         5
           3       1.00      0.80      0.89         5
           4       1.00      1.00      1.00         5
           5       0.56      1.00      0.71         5
           6       1.00      1.00      1.00         5
           7       0.83      1.00      0.91         5
           8       1.00      1.00      1.00         5
           9       0.80      0.80      0.80         5
          10       1.00      1.00      1.00         5
          11       1.00      1.00      1.00         5
          12       1.00      1.00      1.00         5
          13       1.00      1.00      1.00         5
          14       0.83      1.00      0.91         5
          15       0.67      0.40      0.50         5
          16       1.00      0.80      0.89         5
          17       1.00      1.00      1.00         5
          18       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


for k = 3 the accuracy gained is less than the accuracy yeilded at  k = 1

##### for k = 5

In [16]:
classifier = KNeighborsClassifier(n_neighbors=5, weights = "distance")
classifier.fit(A1_train, y1_train)
y_predicted = classifier.predict(A1_test)
results = metrics.classification_report(y1_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       1.00      0.60      0.75         5
           2       0.62      1.00      0.77         5
           3       1.00      0.80      0.89         5
           4       0.83      1.00      0.91         5
           5       0.62      1.00      0.77         5
           6       1.00      1.00      1.00         5
           7       0.71      1.00      0.83         5
           8       1.00      1.00      1.00         5
           9       0.83      1.00      0.91         5
          10       1.00      1.00      1.00         5
          11       1.00      1.00      1.00         5
          12       1.00      1.00      1.00         5
          13       1.00      0.80      0.89         5
          14       1.00      1.00      1.00         5
          15       1.00      0.40      0.57         5
          16       0.80      0.80      0.80         5
          17       1.00      0.80      0.89         5
          18       1.00    

accuracy = 0.86 which is less than the accuracy yeilded at k=1 and k=3

##### k=7

In [17]:
classifier = KNeighborsClassifier(n_neighbors=7, weights='distance')
classifier.fit(A1_train, y1_train)
y_predicted = classifier.predict(A1_test)
results = metrics.classification_report(y1_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       1.00      0.60      0.75         5
           2       0.62      1.00      0.77         5
           3       1.00      0.80      0.89         5
           4       0.83      1.00      0.91         5
           5       0.40      0.80      0.53         5
           6       1.00      1.00      1.00         5
           7       0.83      1.00      0.91         5
           8       1.00      1.00      1.00         5
           9       0.80      0.80      0.80         5
          10       1.00      0.80      0.89         5
          11       1.00      1.00      1.00         5
          12       1.00      0.80      0.89         5
          13       1.00      0.80      0.89         5
          14       1.00      1.00      1.00         5
          15       1.00      0.40      0.57         5
          16       0.83      1.00      0.91         5
          17       1.00      0.60      0.75         5
          18       1.00    

accuracy = 0.82.
We can conclude that the more neighbors are, the less the accuracy will be.

#### Different train/test splitting

In [20]:
#70/30
x_train, x_test, y_train, y_test = train_test_split(data_matrix, labels, test_size=0.3)
Ur = get_reduced_eigenvectors(x_train, 0.95)
A_train = pca(Ur, x_train)
A_test = pca(Ur, x_test)
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(A_train, y_train)
y_predicted = classifier.predict(A_test)
results = metrics.classification_report(y_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       1.00      0.60      0.75         5
           2       1.00      1.00      1.00         3
           3       1.00      0.75      0.86         4
           4       0.50      1.00      0.67         2
           5       0.75      1.00      0.86         3
           6       1.00      1.00      1.00         2
           7       1.00      1.00      1.00         4
           8       1.00      1.00      1.00         3
           9       1.00      1.00      1.00         3
          10       1.00      1.00      1.00         5
          11       1.00      0.75      0.86         4
          12       1.00      1.00      1.00         5
          13       1.00      1.00      1.00         3
          14       1.00      1.00      1.00         2
          15       0.60      1.00      0.75         3
          16       1.00      1.00      1.00         2
          17       1.00      1.00      1.00         5
          18       1.00    

In [27]:
#80/20
x_train, x_test, y_train, y_test = train_test_split(data_matrix, labels, test_size=0.2)
Ur = get_reduced_eigenvectors(x_train, 0.95)
A_train = pca(Ur, x_train)
A_test = pca(Ur, x_test)
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(A_train, y_train)
y_predicted = classifier.predict(A_test)
results = metrics.classification_report(y_test, y_predicted)
print(results)

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         3
           3       0.67      1.00      0.80         2
           4       1.00      0.50      0.67         2
           5       1.00      1.00      1.00         1
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         3
           9       0.33      1.00      0.50         1
          10       1.00      0.67      0.80         3
          11       1.00      1.00      1.00         2
          12       1.00      1.00      1.00         2
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         2
          15       1.00      1.00      1.00         1
          17       1.00      1.00      1.00         3
          18       1.00      1.00      1.00         5
          19       1.00    

By trying different splits for the training and test sets, the best results were yeilded by spliting the data 80% for training and 20% for testing

--------------------------------------------------------------------------------------------------------------------------