In [333]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [334]:
# Test with PCA from scikit-learn
from sklearn.decomposition import PCA

### Implement PCA

In [335]:
def pca(input, target_d):
    """
    input shape (n, m): n -> number of samples, m -> input dimension
    """
    n, m = input.shape
    assert target_d < m, "Target dimension must be lower than original"

    # substract by mean     
    input -= input.mean(axis=0)
    
    # compute covariance matrix
    cov_mat = np.dot(input.T, input) / (n - 1)

    # compute eigenvalues and eigenvector
    values, vectors = np.linalg.eig(cov_mat)
    
    # sort eigenvector by eigenvalues desc
    idx = values.argsort()[::-1]
    vectors = vectors[:, idx]
    
    # take first target_d vectors
    W = vectors[:, :target_d]
    
    # projection on lower dimension
    Y = np.dot(input, W)
    
    return Y

### Implement PCA with SVD

In [336]:
def svd(input, target_d):
    """
    input shape (n, m): n -> number of samples, m -> input dimension
    """
    n, m = input.shape
    assert target_d < m, "Target dimension must be lower than original"

    # substract by mean
    input -= input.mean(axis=0)
    
    # Construct Z (NxM)    
    Z = input / (1 / np.sqrt(n - 1))

    """
    Compute full SVD
    L (NxN)
    S (M, )
    R (MxM)
    """
    L, S, R = np.linalg.svd(Z, full_matrices=True, compute_uv=True)
    
    # sort eigenvectors     
    idx = S.argsort()[::-1]
    R = R[:, idx]
    
    # select the first target_d , shape(M x target_d)
    W = R[:, :target_d]
    
    # project on lower dimension with shape (N x target_d)
    Y = np.dot(input, W)
    
    return Y

### Compare result

In [337]:
n_sample = 3
d = 2
target_d = 1

input = np.random.rand(n_sample, d)

svd_x = svd(input, target_d)
pca_x = pca(input, target_d)
sklearn_pca = PCA(n_components=target_d).fit_transform(input)

print(f"svd {svd_x}\n")
print(f"pca {pca_x}\n")
print(f"Sklearn {sklearn_pca}\n")

svd [[-0.59778944]
 [ 0.09380137]
 [ 0.50398807]]

pca [[-0.59778944]
 [ 0.09380137]
 [ 0.50398807]]

Sklearn [[ 0.59778944]
 [-0.09380137]
 [-0.50398807]]

