# I. Algorithm

In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

In [None]:
import numpy as np
import pandas as pd

def pca(data, num_components=2):
    """
    Perform PCA on the given dataset.

    Parameters:
    - data: 2D numpy array or pandas DataFrame where rows are samples and columns are features.
    - num_components: Number of principal components to return.

    Returns:
    - transformed_data: The data projected onto the selected principal components.
    - explained_variance_ratio: The percentage of variance explained by each of the selected principal components.
    """
    # Standardize the data (mean = 0, variance = 1)
    data_meaned = data - np.mean(data, axis=0)

    # Compute the covariance matrix
    covariance_matrix = np.cov(data_meaned, rowvar=False)

    # Compute the eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

    # Sort the eigenvalues and eigenvectors in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues_sorted = eigenvalues[sorted_indices]
    eigenvectors_sorted = eigenvectors[:, sorted_indices]

    # Select the top 'num_components' eigenvectors
    eigenvectors_selected = eigenvectors_sorted[:, :num_components]

    # Project the data onto the selected eigenvectors
    transformed_data = np.dot(data_meaned, eigenvectors_selected)

    # Explained variance ratio
    explained_variance_ratio = eigenvalues_sorted[:num_components] / np.sum(eigenvalues_sorted)

    return transformed_data, explained_variance_ratio



# II. Application to Data Set