In [1]:
import numpy as np

class PCA:
    def __init__(self, n_components):
        self.n_components = n_components
        self.components = None
        self.mean = None
        self.explained_variance_ = None
        
    def fit(self, X):
        # 1. Center the data by subtracting the mean
        self.mean = np.mean(X, axis=0)
        X_centered = X - self.mean
        
        # 2. Calculate the covariance matrix
        # For n samples and p features, shape will be (p x p)
        cov_matrix = np.cov(X_centered, rowvar=False)
        
        # 3. Calculate eigenvalues and eigenvectors of covariance matrix
        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
        
        # 4. Sort eigenvalues and eigenvectors in descending order
        idx = np.argsort(eigenvalues)[::-1]
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]
        
        # 5. Store first n_components eigenvectors
        self.components = eigenvectors[:, :self.n_components]
        
        # 6. Store explained variance
        self.explained_variance_ = eigenvalues[:self.n_components]
        
        return self
    
    def transform(self, X):
        # Center the data using mean from fit
        X_centered = X - self.mean
        
        # Project data onto principal components
        X_transformed = np.dot(X_centered, self.components)
        
        return X_transformed