# FaceRecognization using KNN and PCA

In [23]:
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.datasets import fetch_olivetti_faces
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import RandomizedPCA

In [None]:
# Olivetti faces has 10 images of 40 person of 64 * 64 dimension

In [6]:
face_data = fetch_olivetti_faces()

In [11]:
number_of_sample, height, width = face_data.images.shape
X = face_data.data
y = face_data.target
print "Number of images "+ str(number_of_sample)
print "Height of each image " + str(height)
print "Width of each image " + str(width)

Number of images 400
Height of each image 64
Width of each image 64


In [39]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=40)
# X_train is training input
# X_test is testing input
# y_train is training output label
# y_test is testing output label

In [13]:
# Doing Principal Component Analysis

In [59]:
# reducing the dimensiontality of dataset
n_components = 150                 # no of eigen vector to select

# finding top n components principal components in data
pca = RandomizedPCA(n_components = n_components).fit(X_train)



In [47]:
# Projecting the data into eigen space

In [60]:
X_train_pca = pca.transform(X_train)     # training input data
X_test_pca = pca.transform(X_test)       # testing input data

# New Height and width
new_height, new_width = X_train_pca.shape
print "New Height " + str(new_height)
print "New Width " + str(new_width)

New Height 300
New Width 150


In [24]:
# Now using K nearest neighbour to classify images


In [100]:
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train_pca, y_train)

# Detecting faces in test space
y_pred_test = knn_classifier.predict(X_test_pca)

In [101]:
# Now calculating accuracy
correct_count = 0.0
for i in range(len(y_test)):
    if y_pred_test[i] == y_test[i]:
        correct_count += 1
accuracy = correct_count / len(y_test)
print accuracy * 100

78.0


In [65]:
# Now using K nearest neighbour using manathan distance
KNeighborsClassifier()

# we are going to use mikowski metric with value of p = 1

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [98]:
knn_classifier_manhathan = KNeighborsClassifier(n_neighbors=5, p=1)
knn_classifier_manhathan.fit(X_train_pca, y_train)

# Detecting faces
y_pred_test_manhathan = knn_classifier_manhathan.predict(X_test_pca)


In [82]:
# Now calculating accuracy score for manhathan distace


In [99]:
correct_count_manhathan = 0.0
for i in range(len(y_test)):
    if y_pred_test_manhathan[i] == y_test[i]:
        correct_count_manhathan += 1
accuracy_manhathan = correct_count_manhathan / len(y_test)
print accuracy_manhathan * 100

79.0


### So Accuracy score of  with different value of nearest neighbour in both distance metrics

<table>
    <tr>
        <th>Number of nearest neighbour</th>
        <th>Eucliediean Distance</th>
        <th>Manhathan Distance</th>
    </tr>
    <tr>
        <td>2</td>
        <td>86.0</td>
        <td>85.0</td>
    </tr>
    <tr>
        <td>5</td>
        <td>78.0</td>
        <td>79.0</td>
    </tr>
    <tr>
        <td>7</td>
        <td>70.0</td>
        <td>73.0</td>
    </tr>
    <tr>
        <td>10</td>
        <td>63.0</td>
        <td>67.0</td>
    </tr>
</table>