In [14]:
#importing libraries
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import metrics

import os
print(os.listdir("../input/olivetti"))

In [15]:
import warnings
warnings.filterwarnings('ignore')
print("warning ignored")

In [16]:
data =np.load("../input/olivetti/olivetti_faces.npy")
target =np.load("../input/olivetti/olivetti_faces_target.npy")

In [17]:
print("there are {} images in dataset".format(len(data)))
print("there are {} unique targets in the datset".format(len(np.unique(target))))
print("size of image {}*{}".format(data.shape[1], data.shape[2]))
print("unique target number:",np.unique(target))

In [18]:
#show 48 distinct people images
def show_40_distinct_people(images, unique_ids):
    fig,axarr=plt.subplots(nrows=4 , ncols =10, figsize=(18,9))#creating 5*10 subplots of figure size 18*9
    axarr=axarr.flatten()
    
    for unique_id in unique_ids:
        image_index=unique_id*10
        axarr[unique_id].imshow(images[image_index],cmap='gray')
        axarr[unique_id].set_xticks([])
        axarr[unique_id].set_yticks([])
        axarr[unique_id].set_title("face id:{}".format(unique_id))
    plt.suptitle("There are 40 distinct people in the dataset")

In [19]:
show_40_distinct_people(data, np.unique(target))

In [20]:
def show_10_faces_of_n_subjects(images,subject_ids):
    cols=10
    rows=len(subject_ids)
    rows=int(rows)
    fig, axarr=plt.subplots(nrows=rows, ncols=cols, figsize=(18,9))
    for i, subject_id in enumerate(subject_ids):
        for j in range(cols):
            image_index=subject_id*10 + j
            axarr[i,j].imshow(images[image_index], cmap="gray")
            axarr[i,j].set_xticks([])
            axarr[i,j].set_yticks([])
            axarr[i,j].set_title("face id:{}".format(subject_id))
show_10_faces_of_n_subjects(images=data, subject_ids=[0,5, 21, 24, 36])

In [21]:
#reshape the images
X = data.reshape((data.shape[0],data.shape[1]*data.shape[2]))
print('X shape',X.shape)

In [22]:
#split the data into training and testing subset
X_test, X_train , y_test , y_train=train_test_split(X,target,test_size =0.3,stratify =target , random_state=0)
print('X_train shape',X_train.shape)
print('y_train shape{}'.format(y_train.shape))


In [23]:
y_frame=pd.DataFrame()
y_frame['subject ids']=y_train
y_frame.groupby(['subject ids']).size().plot.bar(figsize=(15,8),title="Number of Samples for Each Classes")

In [24]:
!pip install mglearn

In [25]:
import mglearn

In [27]:
mglearn.plots.plot_pca_illustration()

In [28]:
#pca projection for defined target number
from sklearn.decomposition import PCA
pca=PCA(n_components=2)
pca.fit(X)
X_pca=pca.transform(X)

In [29]:
number_of_people=10
index_range=number_of_people*10
fig=plt.figure(figsize=(10,8))
ax=fig.add_subplot(1,1,1)
scatter=ax.scatter(X_pca[:index_range,0],
            X_pca[:index_range,1], 
            c=target[:index_range],
            s=10,
           cmap=plt.get_cmap('jet', number_of_people)
          )

ax.set_xlabel("First Principle Component")
ax.set_ylabel("Second Principle Component")
ax.set_title("PCA projection of {} people".format(number_of_people))

fig.colorbar(scatter)

In [30]:
pca=PCA()
pca.fit(X)

plt.figure(1, figsize=(12,8))

plt.plot(pca.explained_variance_, linewidth=2)
 
plt.xlabel('Components')
plt.ylabel('Explained Variaces')
plt.show()

In [31]:
n_components=90
pca=PCA(n_components=n_components, whiten=True)
pca.fit(X_train)

In [32]:
fig,ax=plt.subplots(1,1,figsize=(8,8))
ax.imshow(pca.mean_.reshape((64,64)), cmap="gray")
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('Average Face')

In [33]:
number_of_eigenfaces=len(pca.components_)
eigen_faces=pca.components_.reshape((number_of_eigenfaces, data.shape[1], data.shape[2]))

cols=10
rows=int(number_of_eigenfaces/cols)
fig, axarr=plt.subplots(nrows=rows, ncols=cols, figsize=(15,15))
axarr=axarr.flatten()
for i in range(number_of_eigenfaces):
    axarr[i].imshow(eigen_faces[i],cmap="gray")
    axarr[i].set_xticks([])
    axarr[i].set_yticks([])
    axarr[i].set_title("eigen id:{}".format(i))
plt.suptitle("All Eigen Faces".format(10*"=", 10*"="))

In [34]:
X_train_pca=pca.transform(X_train)
X_test_pca=pca.transform(X_test)
clf = SVC()
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
print("accuracy score:{:.2f}".format(metrics.accuracy_score(y_test, y_pred)))