# Principal Component Analysis

In [106]:
%matplotlib inline
import numpy as np
from sklearn.decomposition import PCA

In [181]:
def PCA_transform(X, n_components=1):
    
    ## Mean-center the data and build the covariance matrix
    X_means = np.mean(X, axis=0)
    X_centered = X - X_means
    dof = X.shape[0]-1
    cov_matrix = np.dot(X_centered.transpose(), X_centered)
    
    ## Just use numpy's buit-in eigenvalue solver
#     evals, evecs = np.linalg.eig(cov_matrix)
    u,s,v = np.linalg.svd(X_centered)
    
    return np.dot(X_centered, v[0:n_components,:].T)
   

## Test it

In [None]:
data = np.random.rand(100,5)

In [184]:
pca = PCA()

pca.fit(data)
pca.transform(data)

array([[ -4.03962865e-01,  -1.20625735e-02,   6.13918628e-01,
          2.69836704e-01,  -5.38550106e-02],
       [  5.82105154e-01,  -1.84453786e-01,   1.41492207e-01,
         -5.86230052e-02,  -3.98072184e-01],
       [ -3.17957315e-01,  -1.67984469e-01,   9.72473143e-02,
         -2.03804746e-02,   1.84372501e-01],
       [  5.72010353e-02,   2.39424124e-01,  -5.18604472e-02,
          1.30122125e-01,   1.55159734e-01],
       [ -6.85364917e-02,   2.24711592e-01,  -4.17249783e-02,
         -5.69063245e-01,   1.80610202e-01],
       [ -9.73287779e-02,  -4.71029212e-01,  -4.43974299e-01,
         -1.42431998e-01,   2.43077087e-01],
       [ -8.03695450e-02,  -1.69205996e-01,  -3.49454041e-01,
          1.20393424e-01,  -2.35170585e-01],
       [  9.71980871e-02,   1.02185027e-01,   1.06612253e-01,
          5.09473316e-01,   1.72526537e-01],
       [ -1.54324060e-01,  -2.62785629e-01,  -2.76307772e-01,
          4.17480698e-02,  -3.86818362e-01],
       [ -4.32795414e-01,  -2.6561724

In [185]:
PCA_transform(data, n_components=5)

array([[ -4.03962865e-01,   1.20625735e-02,  -6.13918628e-01,
         -2.69836704e-01,  -5.38550106e-02],
       [  5.82105154e-01,   1.84453786e-01,  -1.41492207e-01,
          5.86230052e-02,  -3.98072184e-01],
       [ -3.17957315e-01,   1.67984469e-01,  -9.72473143e-02,
          2.03804746e-02,   1.84372501e-01],
       [  5.72010353e-02,  -2.39424124e-01,   5.18604472e-02,
         -1.30122125e-01,   1.55159734e-01],
       [ -6.85364917e-02,  -2.24711592e-01,   4.17249783e-02,
          5.69063245e-01,   1.80610202e-01],
       [ -9.73287779e-02,   4.71029212e-01,   4.43974299e-01,
          1.42431998e-01,   2.43077087e-01],
       [ -8.03695450e-02,   1.69205996e-01,   3.49454041e-01,
         -1.20393424e-01,  -2.35170585e-01],
       [  9.71980871e-02,  -1.02185027e-01,  -1.06612253e-01,
         -5.09473316e-01,   1.72526537e-01],
       [ -1.54324060e-01,   2.62785629e-01,   2.76307772e-01,
         -4.17480698e-02,  -3.86818362e-01],
       [ -4.32795414e-01,   2.6561724