## Partial Least Squares (PLS) Regression:

PLS regression is a method that combines features of principal component analysis and multiple regression. It is particularly useful in situations with multicollinearity.

In [1]:
# Import necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_squared_error

In [2]:
# Generate some example data
np.random.seed(42)
X = 2 * np.random.rand(100, 5)  # 5 features
y = 4 + 3 * X[:, 0] + 1.5 * X[:, 1] + 2 * X[:, 2] + 1.2 * X[:, 3] + 0.8 * X[:, 4] + 1.5 * np.random.randn(100)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Perform PLS Regression
n_components = 3
pls_reg = PLSRegression(n_components=n_components)
pls_reg.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = pls_reg.predict(X_test_scaled)

# Calculate and print the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f"MSE: {mse}")

MSE: 2.1956468776001152
