# Weighted Partial Least Squares Regression (WPLS)

In [14]:
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_squared_error
import numpy as np
from weighted_pls import weighted_pls

## How to use the WPLS

In [15]:
# Generate data
np.random.seed(42)
N = 100
X = np.random.rand(N, 5)
Y = 3 * X[:, 0:1] + 2 * X[:, 1:2] + np.random.rand(N, 1) * 0.1
weights = np.random.randint(1, 5, size=N)  

# Fit and predict with WPLS
wpls = weighted_pls.WeightedPLSRegression(n_components=2)
wpls.fit(X, Y, sample_weight=weights)
Y_pred_wpls = wpls.predict(X)

print("WPLS MSE:", mean_squared_error(Y, Y_pred_wpls))

WPLS MSE: 0.003289513210378584


## Comparison with other implementations

The results (MSE, coefficient, and intercept) of PLS and WPLS (w=1), as well as WPLS and Repeated PLS, are expected to match.

If the results do not match, please report the issue on the repository's page.

In [16]:
# Standard PLS (no weights) by scikit-learn
pls = PLSRegression(n_components=2)
pls.fit(X, Y)
Y_pred = pls.predict(X)

# WPLS with all weights set to 1 (should be the same as standard PLS)
wpls_1 = weighted_pls.WeightedPLSRegression(n_components=2)
wpls_1.fit(X, Y, sample_weight=np.ones(N))
Y_pred_wpls_1 = wpls_1.predict(X)

# PLS on repeated data based on weights (should be the same as WPLS)
X_repeated = np.repeat(X, weights, axis=0)
Y_repeated = np.repeat(Y, weights, axis=0)
pls_repeated = PLSRegression(n_components=2)
pls_repeated.fit(X_repeated, Y_repeated)
Y_pred_repeated = pls_repeated.predict(X)


In [17]:
print("MSE Comparison:")
print(f"PLS MSE: {mean_squared_error(Y, Y_pred):.6f}")
print(f"WPLS (w=1) MSE: {mean_squared_error(Y, Y_pred_wpls_1):.6f}")
print(f"Repeated PLS MSE: {mean_squared_error(Y, Y_pred_repeated):.6f}")
print(f"WPLS MSE: {mean_squared_error(Y, Y_pred_wpls):.6f}")

MSE Comparison:
PLS MSE: 0.003110
WPLS (w=1) MSE: 0.003110
Repeated PLS MSE: 0.003290
WPLS MSE: 0.003290


In [18]:
print("\nCoefficient Comparison:")
print("PLS coef:           ", np.round(pls.coef_.flatten(), 6))
print("WPLS (w=1) coef:    ", np.round(wpls_1.coef_.flatten(), 6))
print("Repeated PLS coef:  ", np.round(pls_repeated.coef_.flatten(), 6))
print("WPLS coef:          ", np.round(wpls.coef_.flatten(), 6))


Coefficient Comparison:
PLS coef:            [ 2.916738e+00  2.042514e+00  1.786000e-03  7.682300e-02 -1.409620e-01]
WPLS (w=1) coef:     [ 2.916738e+00  2.042514e+00  1.786000e-03  7.682300e-02 -1.409620e-01]
Repeated PLS coef:   [ 2.927226  2.033077 -0.065518  0.065072 -0.152955]
WPLS coef:           [ 2.927226  2.033077 -0.065518  0.065072 -0.152955]


In [19]:
print("\nIntercept Comparison:")
print(f"PLS intercept: {pls.intercept_[0]:.6f}")
print(f"WPLS (w=1) intercept: {wpls_1.intercept_[0]:.6f}")
print(f"Repeated PLS intercept: {pls_repeated.intercept_[0]:.6f}")
print(f"WPLS intercept: {wpls.intercept_[0]:.6f}")


Intercept Comparison:
PLS intercept: 2.617127
WPLS (w=1) intercept: 2.617127
Repeated PLS intercept: 2.579659
WPLS intercept: 2.579659
