### Principal Component Analysis (PCA)

In [None]:
import numpy as np

def pca(data, k):
    # Standardize the data
    data_standardized = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
    
    # Compute the covariance matrix
    covariance_matrix = np.cov(data_standardized, rowvar=False) # `rowvar` means each column of the input data represents a variable, and each row represents an observation
    
    # Eigen decomposition
    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
    
    # Sort the eigenvectors by decreasing eigenvalues
    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues_sorted = eigenvalues[idx]
    eigenvectors_sorted = eigenvectors[:,idx]
    
    # Select the top k eigenvectors (principal components)
    principal_components = eigenvectors_sorted[:, :k]
    
    return np.round(principal_components, 4).tolist()

# Example usage
print(pca(np.array([[4,2,1],[5,6,7],[9,12,1],[4,6,7]]),2))
print(pca(np.array([[1, 2], [3, 4], [5, 6]]), k = 1))

[[0.6855, 0.0776], [0.6202, 0.4586], [-0.3814, 0.8853]]
[[0.7071], [0.7071]]


In [5]:
# import numpy as np

# def pca(X, num_components):
#     # 1. Input validation
#     if num_components > X.shape[1]:
#         raise ValueError("num_components cannot be greater than the number of features")

#     # 2. Center the data
#     X_mean = np.mean(X, axis=0)
#     X_centered = X - X_mean

#     # 3. Compute covariance matrix
#     cov_matrix = np.cov(X_centered, rowvar=False)  # rowvar=False indicates that each column represents a variable

#     # 4. Compute eigenvalues and eigenvectors
#     eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)  # Using eigh ensures that the eigenvalues are sorted in ascending order

#     # 5. Sort eigenvalues and eigenvectors in descending order
#     idx = np.argsort(eigenvalues)[::-1]   # Sort eigenvalues in descending order
#     eigenvalues = eigenvalues[idx]        # Eigenvalues is a 1D array, Shape: (n_features,)
#     eigenvectors = eigenvectors[:, idx]   # Eigenvectors is a 2D array, Shape: (n_features, n_features)

#     # 6. Select top k eigenvectors
#     principal_components = eigenvectors[:, :num_components]  # Select top-k eigenvectors

#     # 7. Transform the data
#     X_transformed = np.dot(X_centered, principal_components)

#     return X_transformed, principal_components, eigenvalues

# # Example usage
# X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
# X_pca, components, explained_var = pca(X, 1)
# print("PCA Result:", X_pca)
# print("Components:", components)
# print("Explained variance:", explained_var)