In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
import numpy as np

In [2]:
#Getting the feature matrix and target vector from iris data set.
X, y = load_iris(return_X_y = True)

### Implementation of PCA from scratch

In [3]:
class myPCA:
    def __init__(self, n_components: int = 2, method : str = 'svd') -> None:
        '''
            The constructor of the PCA algorithm.
        :param n_compoents : int, default = 2
            The dimension in which the data will be reduced.
        :param method: str, default sud
            The way used by PCA to reduce the dimensionality of the data.
        '''
        
        self.__n_components = n_components
        if method in ('svd', 'eigen'):
            self.__method = method
        else:
            raise ValueError (f" '{method}' isn't a method implemented in this model")
    def fit(self, X: 'np. array'):
        '''
            The fitting method.
        param X: np.array
            The data on which we want to fit the pca
        '''
        if self.__method == 'svd':
            U, S, V = np.linalg.svd(X)
            self.__V = V[: self.__n_components, :]
        elif self.__method == 'eigen':
            corr_mat = np.corrcoef(X.T)
            # Getting the eigen vectors and eigenvalues
            self.eig_vals, self.eig_vecs = np.linalg.eig(corr_mat)
            
            # Sorting the list of tuples (eigenvalue, eigenvector)
            self.eig_pairs = [(np.abs(self.eig_vals[i]), self. eig_vecs [:, i])
                                for i in range(len(self.eig_vals ))]
            self.eig_pairs.sort(key = lambda x: x[0], reverse = True)
            
            # Calculating the explainet ration
            total = sum(self.eig_vals )
            self.explained_variance_ratio = [(i/total)* 100
                                for i in sorted(self.eig_vals, reverse= True)]
            self.cumulative_variance_ratio = np.cumsum(self.explained_variance_ratio)
            
            # Creating the projection matrix
            self.matriz_w = np.hstack(
                (self. eig_pairs[i][1].reshape(np.size (X, 1),1)
                for i in range( self.__n_components))
            )
            return self
    def transform (self , X: 'np. array') -> 'np. array':
        '''
            The transform function.
        :param X: np.array
            The data that we must reduce.
        '''
    
        if self.__method == 'svd':
            return X.dot(self.__V.T)
        elif self.__method == 'eigen':
            return X.dot (self.matrix_w)

In [4]:
#Instantiating a PCA object
PCA = myPCA()

In [5]:
#Fitting fuction on X matrix with the features
PCA.fit(X)

In [6]:
#Transforming the data into a new reduced matrix pca_x 
pca_x = PCA.transform(X)

In [7]:
pca_x.shape

(150, 2)

### Implementation of PCA from sklearn

In [8]:
#Importing PCA algorithm
from sklearn.decomposition import PCA
#Reducing to 2 dimensions
pca = PCA(n_components = 2)
#Fit method
pca.fit(X)

PCA(n_components=2)

In [9]:
#Transforming the data with transform method
pca_X = pca.transform(X)

In [10]:
pca_X.shape

(150, 2)