In [4]:
import numpy as np

class PrincipalComponentRegression:
    def __init__(self, n_components):
        self.n_components = n_components
        self.coefficients = None
        self.eigenvalues = None
        self.eigenvectors = None

    def fit(self, X, y):
        # Center the feature matrix by subtracting the mean of each feature
        X_centered = X - np.mean(X, axis=0)

        # Compute the covariance matrix of the centered feature matrix
        covariance_matrix = np.cov(X_centered.T)

        # Compute the eigenvalues and eigenvectors of the covariance matrix
        eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

        # Sort the eigenvalues and eigenvectors in descending order
        sort_indices = np.argsort(eigenvalues)[::-1]
        eigenvalues = eigenvalues[sort_indices]
        eigenvectors = eigenvectors[:, sort_indices]

        # Store the eigenvalues and eigenvectors for later use
        self.eigenvalues = eigenvalues
        self.eigenvectors = eigenvectors

        # Select the desired number of principal components
        X_selected = X_centered.dot(eigenvectors[:, :self.n_components])

        # Add a column of ones to account for the bias term and fit the linear regression model
        X_selected_with_bias = np.column_stack((np.ones(len(X_selected)), X_selected))
        self.coefficients = np.linalg.lstsq(X_selected_with_bias, y, rcond=None)[0]

    def predict(self, X):
        # Center the new data by subtracting the mean of each feature
        X_selected = X - np.mean(X, axis=0)

        # Project the centered new data onto the selected principal components
        X_selected = X_selected.dot(self.eigenvectors[:, :self.n_components])

        # Add a column of ones to account for the bias term and make predictions
        X_selected_with_bias = np.column_stack((np.ones(len(X_selected)), X_selected))
        y_pred = X_selected_with_bias.dot(self.coefficients)
        return y_pred


# Example usage
# Generate some sample data
np.random.seed(0)
X_train = np.random.rand(100, 5)  # Training features
y_train = np.random.rand(100)     # Training response
X_test = np.random.rand(50, 5)    # Test features

# Create and fit the PrincipalComponentRegression model
n_components = 3
pcr = PrincipalComponentRegression(n_components)
pcr.fit(X_train, y_train)

# Make predictions on the test data
y_pred = pcr.predict(X_test)

# Print the regression coefficients
print("Regression Coefficients:")
print(pcr.coefficients[1:])  # Exclude the bias term

# Print the variance explained by each principal component
explained_variance_ratio = pcr.eigenvalues / np.sum(pcr.eigenvalues)
print("Variance Explained by Principal Components:")
print(explained_variance_ratio[:n_components])


Regression Coefficients:
[-0.03317929 -0.06024463 -0.03888201]
Variance Explained by Principal Components:
[0.2679184  0.22563357 0.20109877]
