# Performing PCA on olivetti faces - http://nicolas-hug.com/blog/matrix_facto_1

In [16]:
from sklearn.datasets import olivetti_faces
import numpy as np

In [3]:
faces = olivetti_faces.fetch_olivetti_faces()

In [4]:
type(faces)

sklearn.datasets.base.Bunch

In [5]:
data = faces.data

In [6]:
data.shape

(400, 4096)

In [7]:
from sklearn.decomposition import PCA,TruncatedSVD

In [8]:
pca = PCA(n_components=4)

In [9]:
data_transformed = pca.fit_transform(data)

In [10]:
data_transformed.shape

(400, 4)

### Center the data before performing SVD

In [27]:
U,S,VT = np.linalg.svd(data-data.mean(axis=0))

### You will see that the pca components are equal to VT (The third component after the matrix is factorized)

In [20]:
pca.components_

array([[-0.0041911 , -0.0071095 , -0.00933609, ...,  0.00018516,
         0.00337966,  0.00318826],
       [-0.02859139, -0.03328836, -0.0378465 , ...,  0.02962783,
         0.02721299,  0.02488899],
       [ 0.0013569 , -0.00032581, -0.000198  , ..., -0.01541365,
        -0.01370978, -0.01188341],
       [ 0.00112445, -0.00179019, -0.01168208, ...,  0.02942981,
         0.02781905,  0.02521845]])

In [28]:
VT

array([[-4.19110339e-03, -7.10950093e-03, -9.33609344e-03, ...,
         1.85158497e-04,  3.37965507e-03,  3.18825641e-03],
       [-2.85913907e-02, -3.32883634e-02, -3.78464982e-02, ...,
         2.96278279e-02,  2.72129867e-02,  2.48889923e-02],
       [ 1.35690800e-03, -3.25796980e-04, -1.97989022e-04, ...,
        -1.54136522e-02, -1.37097845e-02, -1.18834134e-02],
       ...,
       [-2.53726207e-02,  1.66105758e-02, -5.98885864e-03, ...,
         7.49510527e-01, -9.66182351e-02, -2.15839334e-02],
       [-1.77378319e-02,  1.03698634e-02,  5.80460113e-03, ...,
        -9.82896760e-02,  7.77752340e-01, -1.17350824e-01],
       [-4.25733114e-03,  5.38527220e-03, -5.34956460e-04, ...,
        -1.98797639e-02, -1.17948242e-01,  7.65613616e-01]], dtype=float32)

### Compute the transformed data by projecting the data onto the eigenvectors :)

In [31]:
data_transformed_svd = np.dot(data-data.mean(axis=0),VT.T)

### Validate that both, the svd transformed data and the pca transformed data is the same

In [33]:
data_transformed_svd[0,:]

array([-6.4326129e+00, -7.0366877e-01, -1.4300222e+00, ...,
       -2.9569492e-08,  5.9604645e-08,  4.0978193e-08], dtype=float32)

In [34]:
data_transformed[0,:]

array([-6.4326122 , -0.70366922, -1.43002281,  1.27853534])

### Display orthonormality

In [38]:
U[:,0].dot(np.transpose(U[:,1]))

2.1645974e-10

In [45]:
pca.components_[0,:].dot(pca.components_[1,:].T)

1.474514954580286e-17