In [1]:
"""
PCA computation via SVD.
Compares the PCA computed via SVD with
direct computation using eigens of covariance
matrix. Demonstrates that they are equal.
"""
import numpy as np
print(np.__version__)

1.16.5


In [2]:
%run 4.3.2-common.ipynb

In [3]:
N = 100

# We create a random feature vector
x_0 = np.random.normal(0, 100, N)

# Then we create another feature vector which is
# highly correlated with the previous feature:
# x1 = 2 * x0.
# We add random noise to this second feature.
x_1 = 2 * x_0 + np.random.normal(0, 20, N)

# Create the data matrix with x0, x1 as columns
X = np.column_stack((x_0, x_1))

# Perform PCA (directly as eigen of covariance
# matrix)
principal_values, principal_components = pca(X)
sorted_indices = np.argsort(-principal_values)
print("Principal components obtained via PCA:\n{}"\
      .format(principal_components[:,
                                   sorted_indices]))

# Perform PCA via SVD following Mean subtraction
X_mean = X - np.mean(X, axis=0)
U, S, V_t = np.linalg.svd(X_mean)
V = V_t.T

# Columns of V are the principal components
print("Principal components obtained via SVD:\n{}".\
      format(V))


# Assert that principal components are the same
for i in range(V.shape[1]):
    # -V[:, i] is used as they can be in opposite
    # direction
    assert np.allclose(
               V[:, i],
               principal_components[:, sorted_indices[i]])\
           or np.allclose(
               -V[:, i],
               principal_components[:, sorted_indices[i]]) 

Principal components obtained via PCA:
[[-0.44419    -0.89593261]
 [-0.89593261  0.44419   ]]
Principal components obtained via SVD:
[[-0.44419    -0.89593261]
 [-0.89593261  0.44419   ]]
