In [5]:
# Principle Component Analysis (PCA) in Numpy
import numpy as np

def PCA(X, k):

    # Standardize the data 
    X_standard = (X - np.mean(X,axis=0)) / np.std(X,axis=0) # axis=0 is saying that our featurs are indicated by columns
    
    # Compute the covariance matrix 
    cov_matrix = np.cov(X_standard,rowvar=False)

    #Compute the eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
    
    # Compute the eigenvalues and eigenvectors in descending order of eigenvalues
    idx = eigenvalues.argsort()[::-1] # gives us index of eigenvalues in descending order
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:,idx] 
    
    # Took the top k eigenvectors (principle components)
    eigenvectors = eigenvectors[:,:k]
    
    # Transfrom the data by multipying the standarized data by our top k eigenvectors
    x_pca = np.matmul(X_standard, eigenvectors)
    
    # Return our eigenvectors, eigenvalues and transformed data
    return eigenvectors.T, eigenvalues[:k], x_pca

In [6]:
data = np.array([[2,6,3,1],[3,2,1,7],[3,4,3,2],[4,2,1,5],[1,2,8,5]])
PCA(data,2)

(array([[ 0.51936987, -0.50375745, -0.43631032,  0.53489875],
        [-0.46975445, -0.5022783 ,  0.57076007,  0.44864263]]),
 array([2.65096625, 2.12315398]),
 array([[-1.88551614, -1.26150583],
        [ 1.68874322,  0.31652747],
        [-0.50238851, -0.88951369],
        [ 1.70973376, -0.55365747],
        [-1.01057233,  2.38814952]]))

In [10]:
# PCA using sklearn
from sklearn.decomposition import PCA
data_standard = (data - np.mean(data,axis=0)) / np.std(data, axis=0)
pca = PCA(n_components=2)
pca.fit(data_standard)

print("Eigenvectors:\n", pca.components_)
print("Eigenvalues:\n", pca.explained_variance_)
print("Transfromed Data:\n", pca.transform(data_standard))

print("Explain Variance Ratio:\n", pca.explained_variance_ratio_) # outputs percentage that each PC is responsible for variance in data

Eigenvectors:
 [[ 0.51936987 -0.50375745 -0.43631032  0.53489875]
 [-0.46975445 -0.5022783   0.57076007  0.44864263]]
Eigenvalues:
 [2.65096625 2.12315398]
Transfromed Data:
 [[-1.88551614 -1.26150583]
 [ 1.68874322  0.31652747]
 [-0.50238851 -0.88951369]
 [ 1.70973376 -0.55365747]
 [-1.01057233  2.38814952]]
Explain Variance Ratio:
 [0.53019325 0.4246308 ]
