# PCA(sklearn) v/s SVD

In [1]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import numpy as np

In [2]:
X = np.array([[12,20,30,13],
              [15,12,17,14],
              [0,18,73,9],
              [13,54,67,34],
              [34,56,78,9]])   
X , '--------------' , X.shape

(array([[12, 20, 30, 13],
        [15, 12, 17, 14],
        [ 0, 18, 73,  9],
        [13, 54, 67, 34],
        [34, 56, 78,  9]]), '--------------', (5, 4))

In [3]:
#feature scaling standardization (from sklearn.preprocessing import StandardScaler)
X_std = StandardScaler().fit_transform(X)
X_std

array([[-0.25585985, -0.63245553, -0.93185403, -0.30026054],
       [ 0.0182757 , -1.05409255, -1.45855414, -0.19302463],
       [-1.35240205, -0.73786479,  0.81030785, -0.72920418],
       [-0.16448133,  1.15950181,  0.5672155 ,  1.95169353],
       [ 1.75446753,  1.26491106,  1.01288482, -0.72920418]])

In [4]:
# PCA (from sklearn.decomposition import PCA)
pca_model = PCA()

# pca project_data.

In [5]:
pca_project_data = pca_model.fit_transform(X_std)

In [6]:
#input for SVD
Sx_std = X_std.T  
Sx_std ,'-----' , Sx_std.shape

(array([[-0.25585985,  0.0182757 , -1.35240205, -0.16448133,  1.75446753],
        [-0.63245553, -1.05409255, -0.73786479,  1.15950181,  1.26491106],
        [-0.93185403, -1.45855414,  0.81030785,  0.5672155 ,  1.01288482],
        [-0.30026054, -0.19302463, -0.72920418,  1.95169353, -0.72920418]]),
 '-----',
 (4, 5))

In [7]:
cov = np.cov(Sx_std)
cov

array([[ 1.25      ,  0.79224363,  0.19992077, -0.135227  ],
       [ 0.79224363,  1.25      ,  0.86695157,  0.56800878],
       [ 0.19992077,  0.86695157,  1.25      ,  0.08472175],
       [-0.135227  ,  0.56800878,  0.08472175,  1.25      ]])

In [8]:
u, sig , v = np.linalg.svd(cov)

# svd project data.

In [9]:

svd_project_data = np.dot(u.T,Sx_std)
svd_project_data

array([[ 1.10453071,  1.51312394,  0.88581115, -1.54069928, -1.96276652],
       [ 0.12387819,  0.20883747, -0.1293467 , -1.74714643,  1.54377748],
       [ 0.41063674,  0.96557808, -1.66123786,  0.27572974,  0.0092933 ],
       [-0.14147851,  0.10288642,  0.02588684,  0.00614604,  0.00655921]])

In [10]:
pca_project_data

array([[-1.10453071, -0.12387819, -0.41063674,  0.14147851],
       [-1.51312394, -0.20883747, -0.96557808, -0.10288642],
       [-0.88581115,  0.1293467 ,  1.66123786, -0.02588684],
       [ 1.54069928,  1.74714643, -0.27572974, -0.00614604],
       [ 1.96276652, -1.54377748, -0.0092933 , -0.00655921]])

pca_project_data and svd_project_data are transpose to each other and signs are doesn't match. Its ok the signs really doesn't matter. (Think of it, if not search it on stackoverflow it is already answered)

# pca Restruct Data.

In [11]:
pca_model.inverse_transform(pca_project_data)

array([[-0.25585985, -0.63245553, -0.93185403, -0.30026054],
       [ 0.0182757 , -1.05409255, -1.45855414, -0.19302463],
       [-1.35240205, -0.73786479,  0.81030785, -0.72920418],
       [-0.16448133,  1.15950181,  0.5672155 ,  1.95169353],
       [ 1.75446753,  1.26491106,  1.01288482, -0.72920418]])

# svd Reconstruct Data.

In [12]:
reconstruct_data = np.dot(u , svd_project_data)
reconstruct_data

array([[-0.25585985,  0.0182757 , -1.35240205, -0.16448133,  1.75446753],
       [-0.63245553, -1.05409255, -0.73786479,  1.15950181,  1.26491106],
       [-0.93185403, -1.45855414,  0.81030785,  0.5672155 ,  1.01288482],
       [-0.30026054, -0.19302463, -0.72920418,  1.95169353, -0.72920418]])

# Projecting data to lower dimensions using pca.

In [13]:
lower_dim_pca_model = PCA(n_components=2)  # Data will be projected to 2 dimensions of maximum variation

In [14]:
lower_dim_pca_project_data = lower_dim_pca_model.fit_transform(X_std) 
lower_dim_pca_project_data

array([[-1.10453071, -0.12387819],
       [-1.51312394, -0.20883747],
       [-0.88581115,  0.1293467 ],
       [ 1.54069928,  1.74714643],
       [ 1.96276652, -1.54377748]])

# Reconstruction of lower dimension projected data using pca.

In [15]:
lower_dim_pca_model.inverse_transform(lower_dim_pca_project_data)

array([[-0.41683629, -0.7638431 , -0.56381249, -0.40030063],
       [-0.54871822, -1.04836812, -0.77204119, -0.58046719],
       [-0.46470812, -0.60112923, -0.45415009, -0.13352783],
       [-0.31633807,  1.14435097,  0.77279899,  1.84917488],
       [ 1.7466007 ,  1.26898946,  1.01720478, -0.73487923]])

Reconstruction of lower dimensions projected data will not be same as original data. Because of reconstruction error.

# projectioning data to lower dimensions using svd.

In [16]:
lower_dim_svd_project_data = np.dot(u[:,:2].T,Sx_std)
lower_dim_svd_project_data

array([[ 1.10453071,  1.51312394,  0.88581115, -1.54069928, -1.96276652],
       [ 0.12387819,  0.20883747, -0.1293467 , -1.74714643,  1.54377748]])

# Reconstruction of lower dimensions projected data using svd.

In [17]:
lower_dim_svd_reconstructed_data = np.dot(u[:,:2] , lower_dim_svd_project_data)
lower_dim_svd_reconstructed_data

array([[-0.41683629, -0.54871822, -0.46470812, -0.31633807,  1.7466007 ],
       [-0.7638431 , -1.04836812, -0.60112923,  1.14435097,  1.26898946],
       [-0.56381249, -0.77204119, -0.45415009,  0.77279899,  1.01720478],
       [-0.40030063, -0.58046719, -0.13352783,  1.84917488, -0.73487923]])