In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import pickle
from sklearn.decomposition import PCA

In [None]:
data = pickle.load(open('./data/data_images_100.pickle','rb'))

In [None]:
data

In [None]:
X = data.drop('name',axis=1).values
X

In [None]:
mean_face = X.mean(axis=0)
mean_face.shape

In [None]:
plt.imshow(mean_face.reshape(100,100),cmap="gray")
plt.axis("off")
plt.show

In [None]:
X_t = X - mean_face

In [None]:
pca = PCA(n_components=None, whiten=True, svd_solver='auto')
pca.fit(X_t)

In [None]:
exp_var_df = pd.DataFrame()
exp_var_df['explained_var'] = pca.explained_variance_ratio_
exp_var_df['cume_explained_var'] = exp_var_df['explained_var'].cumsum()
exp_var_df['principal_components'] = np.arange(1,len(exp_var_df) + 1)
exp_var_df.head()

In [None]:
exp_var_df.set_index('principal_components', inplace = True)

In [None]:
fig, ax = plt.subplots(nrows=2, figsize = (15,12))
exp_var_df['explained_var'].head(100).plot(kind='line', marker = 'o', ax=ax[0])
exp_var_df['cume_explained_var'].head(100).plot(kind='line', marker = 'o', ax=ax[1])

In [None]:
pca_20 = PCA(n_components=20, whiten=True, svd_solver='auto')
pca_data = pca_20.fit_transform(X_t)

In [None]:
# Save data and models
y = data['name'].values
np.savez('./data/data_pca_face',pca_data,y)

In [None]:
pca_dict = {'pca':pca_20,'mean_face':mean_face}

In [None]:
pickle.dump(pca_dict, open('./model/cpa_dict_face.pickle','wb'))

**Visualize image**

In [None]:
pca_data_inv = pca_20.inverse_transform(pca_data)

In [None]:
pca_data_inv.shape

In [None]:
eig_img = pca_data_inv[0,:].reshape((100,100))
eig_img.shape

In [None]:
plt.imshow(eig_img,cmap='gray')
plt.axis('off')

In [None]:
np.random.seed(1001)
pics = np.random.randint(0, 942, 40)
plt.figure(figsize=(15,8))
for i, pic in enumerate(pics):
    plt.subplot(4, 10, i + 1)
    img = X[pic:pic + 1].reshape(100, 100)
    plt.imshow(img, cmap='gray')
    plt.title('{}'.format(y[pic]))
    plt.xticks([])
    plt.yticks([])
plt.show()
print("=" * 20 + 'Eigen Images' + "=" * 20)
plt.figure(figsize=(15,8))
for i, pic in enumerate(pics):
    plt.subplot(4, 10, i + 1)
    img = pca_data_inv[pic:pic + 1].reshape(100, 100)
    plt.imshow(img, cmap='gray')
    plt.title('{}'.format(y[pic]))
    plt.xticks([])
    plt.yticks([])

plt.show()