## Principal Component Regression (PCR):

PCR is a technique that combines principal component analysis (PCA) and linear regression. It involves transforming the independent variables into principal components and then performing linear regression on these components.

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
# Generate some example data
np.random.seed(42)
X = 2 * np.random.rand(100, 5)  # 5 features
y = 4 + 3 * X[:, 0] + 1.5 * X[:, 1] + 2 * X[:, 2] + 1.2 * X[:, 3] + 0.8 * X[:, 4] + 1.5 * np.random.randn(100)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# Perform PCA on the training data
pca = PCA()
X_train_pca = pca.fit_transform(X_train)

# Choose the number of principal components to retain
n_components = 3
X_train_pca_selected = X_train_pca[:, :n_components]

# Fit linear regression on the selected principal components
regressor = LinearRegression()
regressor.fit(X_train_pca_selected, y_train)

# Transform the test data using the same PCA and select the same number of components
X_test_pca = pca.transform(X_test)[:, :n_components]

# Make predictions on the test set
y_pred = regressor.predict(X_test_pca)

# Calculate and print the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f"MSE: {mse}")

# Note: For a more realistic example, you may want to use real-world data.

# Print the explained variance ratio for each principal component
print("Explained Variance Ratio:")
print(pca.explained_variance_ratio_)

# Print the cumulative explained variance
print("Cumulative Explained Variance:")
print(np.cumsum(pca.explained_variance_ratio_))


MSE: 4.699129074609967
Explained Variance Ratio:
[0.28363871 0.23175484 0.19111374 0.15515175 0.13834096]
Cumulative Explained Variance:
[0.28363871 0.51539354 0.70650728 0.86165904 1.        ]
