### Principal Component Analysis (PCA)

In [None]:
import numpy as np

def pca(X, num_components):
  # 1. Input validation
  if num_components > X.shape[1]:
      raise ValueError("num_components cannot be greater than the number of features")
  
  # 2. Center the data
  X_mean = np.mean(X, axis=0)
  X_centered = X - X_mean
  
  # 3. Compute covariance matrix
  # Using ddof=1 for unbiased estimation
  cov_matrix = np.cov(X_centered.T, ddof=1)               # np.cov expect (n_features, n_samples) instead of reverse
  
  # 4. Compute eigenvalues and eigenvectors
  eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)  # Using eigh instead of eig
  
  # 5. Sort eigenvalues and eigenvectors in descending order
  idx = np.argsort(eigenvalues)[::-1]                     # sort on eigen"values"
  eigenvalues = eigenvalues[idx]                          # Eigenvalues is a 1D array, Shape: (n_features,)
  eigenvectors = eigenvectors[:, idx]                     # Eigenvectors is a 2D array, Shape: (n_features, n_features)
  
  # 6. Select top k eigenvectors
  W = eigenvectors[:, :num_components]                    # select top-k eigen"vectors"
  
  # 7. Transform the data
  X_transformed = np.dot(X_centered, W)
  
  return X_transformed, W, eigenvalues

# Example usage
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
X_pca, components, explained_var = pca(X, 1)
print("PCA Result:", X_pca)
print("Components:", components)
print("Explained variance:", explained_var)

PCA Result: [[-2.12132034]
 [-0.70710678]
 [ 0.70710678]
 [ 2.12132034]]
Components: [[0.70710678]
 [0.70710678]]
Explained variance: [3.33333333 0.        ]
