<a href="https://colab.research.google.com/github/mobarakol/tutorial_notebooks/blob/main/PCA_Reduction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

PCA from scratch:

In [8]:
#Importing required libraries
import numpy as np

#Generate a dummy dataset.
X = np.random.randint(10,50,100).reshape(20,5)
# mean Centering the data
X_meaned = X - np.mean(X , axis = 0)

# calculating the covariance matrix of the mean-centered data.
cov_mat = np.cov(X_meaned , rowvar = False)

#Calculating Eigenvalues and Eigenvectors of the covariance matrix
eigen_values , eigen_vectors = np.linalg.eigh(cov_mat)

#sort the eigenvalues in descending order
sorted_index = np.argsort(eigen_values)[::-1]

sorted_eigenvalue = eigen_values[sorted_index]
#similarly sort the eigenvectors
sorted_eigenvectors = eigen_vectors[:,sorted_index]

# select the first n eigenvectors, n is desired dimension
# of our final reduced data.

n_components = 2 #you can select any number of components.
eigenvector_subset = sorted_eigenvectors[:,0:n_components]

#Transform the data
X_reduced = np.dot(eigenvector_subset.transpose(),X_meaned.transpose()).transpose()
X.shape, X_reduced.shape

((20, 5), (20, 2))

Exact PCA from SKLEARN:

In [20]:
from sklearn.decomposition import PCA
X = np.random.randint(10,50,100).reshape(20,5)
pca = PCA(n_components = 2)
fit = pca.fit(X)
X_reduced = fit.components_
X.shape, X_reduced.shape

((20, 5), (2, 5))

In [22]:
from sklearn.decomposition import PCA
X = np.random.randint(10,50,100).reshape(20,5)
X = X.transpose(1,0)
pca = PCA(n_components = 2)
fit = pca.fit(X)
X_reduced = fit.components_
X.shape, X_reduced.shape

((5, 20), (2, 20))

Incremental PCA:

In [23]:
from sklearn.datasets import load_digits
from sklearn.decomposition import IncrementalPCA

X = np.random.randint(10,50,100).reshape(20,5)
pca = IncrementalPCA(n_components = 2)
fit = pca.fit(X)
X_reduced = fit.components_
X.shape, X_reduced.shape

((20, 5), (2, 5))

In [26]:
from sklearn.decomposition import IncrementalPCA

X = np.random.randint(10,50,100).reshape(20,5)
X = X.transpose(1,0)
pca = IncrementalPCA(n_components = 2)
X_transformed = pca.fit_transform(X)
X.shape, X_transformed.shape

((5, 20), (5, 2))

Kernel PCA:

In [28]:
from sklearn.decomposition import KernelPCA

X = np.random.randint(10,50,100).reshape(20,5)
X = X.transpose(1,0)
pca = KernelPCA(n_components = 2,  kernel = 'sigmoid')
X_transformed = pca.fit_transform(X)
X.shape, X_transformed.shape

((5, 20), (5, 2))

PCA using randomized SVD:

In [30]:
from sklearn.decomposition import KernelPCA

X = np.random.randint(10,50,100).reshape(20,5)
pca = PCA(n_components = 2, svd_solver = 'randomized')
fit = pca.fit(X)
X_reduced = fit.components_
X.shape, X_reduced.shape

((20, 5), (2, 5))

SVD for dimension reduction:

In [32]:
from sklearn.decomposition import TruncatedSVD
X = np.random.randint(10,50,100).reshape(20,5)
X = X.transpose(1,0)
svd = TruncatedSVD(n_components=2)
X_transformed = pca.fit_transform(X)
X.shape, X_transformed.shape

((5, 20), (5, 2))