Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MNT deprecate attributes in Partial Least Squares module #18768

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/whats_new/v0.24.rst
Expand Up @@ -149,6 +149,13 @@ Changelog
retrieved by calling `transform` on the training data. The `norm_y_weights`
attribute will also be removed. :pr:`17095` by `Nicolas Hug`_.

- |API| For :class:`cross_decomposition.PLSRegression`,
:class:`cross_decomposition.PLSCanonical`,
:class:`cross_decomposition.CCA`, and
:class:`cross_decomposition.PLSSVD`, the `x_mean_`, `y_mean_`, `x_std_`, and
`y_std_` attributes were deprecated and will be removed in 0.26.
:pr:`18768` by :user:`Maren Westermann <marenwestermann>`.

:mod:`sklearn.datasets`
.......................

Expand Down
84 changes: 70 additions & 14 deletions sklearn/cross_decomposition/_pls.py
Expand Up @@ -215,7 +215,7 @@ def fit(self, X, Y):
norm_y_weights = self._norm_y_weights

# Scale (in place)
Xk, Yk, self.x_mean_, self.y_mean_, self.x_std_, self.y_std_ = (
Xk, Yk, self._x_mean, self._y_mean, self._x_std, self._y_std = (
_center_scale_xy(X, Y, self.scale))

self.x_weights_ = np.zeros((p, n_components)) # U
Expand Down Expand Up @@ -294,7 +294,7 @@ def fit(self, X, Y):
check_finite=False))

self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
self.coef_ = self.coef_ * self.y_std_
self.coef_ = self.coef_ * self._y_std
return self

def transform(self, X, Y=None, copy=True):
Expand All @@ -318,16 +318,16 @@ def transform(self, X, Y=None, copy=True):
check_is_fitted(self)
X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
# Normalize
X -= self.x_mean_
X /= self.x_std_
X -= self._x_mean
X /= self._x_std
# Apply rotation
x_scores = np.dot(X, self.x_rotations_)
if Y is not None:
Y = check_array(Y, ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES)
if Y.ndim == 1:
Y = Y.reshape(-1, 1)
Y -= self.y_mean_
Y /= self.y_std_
Y -= self._y_mean
Y /= self._y_std
y_scores = np.dot(Y, self.y_rotations_)
return x_scores, y_scores

Expand Down Expand Up @@ -356,8 +356,8 @@ def inverse_transform(self, X):
X_reconstructed = np.matmul(X, self.x_loadings_.T)

# Denormalize
X_reconstructed *= self.x_std_
X_reconstructed += self.x_mean_
X_reconstructed *= self._x_std
X_reconstructed += self._x_mean
return X_reconstructed

def predict(self, X, copy=True):
Expand All @@ -380,10 +380,10 @@ def predict(self, X, copy=True):
check_is_fitted(self)
X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
# Normalize
X -= self.x_mean_
X /= self.x_std_
X -= self._x_mean
X /= self._x_std
Ypred = np.dot(X, self.coef_)
return Ypred + self.y_mean_
return Ypred + self._y_mean

def fit_transform(self, X, y=None):
"""Learn and apply the dimension reduction on the train data.
Expand Down Expand Up @@ -412,6 +412,34 @@ def fit_transform(self, X, y=None):
def norm_y_weights(self):
return self._norm_y_weights

@deprecated( # type: ignore
"Attribute x_mean_ was deprecated in version 0.24 and "
"will be removed in 0.26.")
@property
def x_mean_(self):
return self._x_mean

@deprecated( # type: ignore
"Attribute y_mean_ was deprecated in version 0.24 and "
"will be removed in 0.26.")
@property
def y_mean_(self):
return self._y_mean

@deprecated( # type: ignore
"Attribute x_std_ was deprecated in version 0.24 and "
"will be removed in 0.26.")
@property
def x_std_(self):
return self._x_std

@deprecated( # type: ignore
"Attribute y_std_ was deprecated in version 0.24 and "
"will be removed in 0.26.")
@property
def y_std_(self):
return self._y_std

@property
def x_scores_(self):
# TODO: raise error in 0.26 instead
Expand Down Expand Up @@ -870,7 +898,7 @@ def fit(self, X, Y):
)
n_components = rank_upper_bound

X, Y, self.x_mean_, self.y_mean_, self.x_std_, self.y_std_ = (
X, Y, self._x_mean, self._y_mean, self._x_std, self._y_std = (
_center_scale_xy(X, Y, self.scale))

# Compute SVD of cross-covariance matrix
Expand Down Expand Up @@ -905,6 +933,34 @@ def x_scores_(self):
def y_scores_(self):
return self._y_scores

@deprecated( # type: ignore
"Attribute x_mean_ was deprecated in version 0.24 and "
"will be removed in 0.26.")
@property
def x_mean_(self):
return self._x_mean

@deprecated( # type: ignore
"Attribute y_mean_ was deprecated in version 0.24 and "
"will be removed in 0.26.")
@property
def y_mean_(self):
return self._y_mean

@deprecated( # type: ignore
"Attribute x_std_ was deprecated in version 0.24 and "
"will be removed in 0.26.")
@property
def x_std_(self):
return self._x_std

@deprecated( # type: ignore
"Attribute y_std_ was deprecated in version 0.24 and "
"will be removed in 0.26.")
@property
def y_std_(self):
return self._y_std

def transform(self, X, Y=None):
"""
Apply the dimensionality reduction.
Expand All @@ -926,13 +982,13 @@ def transform(self, X, Y=None):
"""
check_is_fitted(self)
X = check_array(X, dtype=np.float64)
Xr = (X - self.x_mean_) / self.x_std_
Xr = (X - self._x_mean) / self._x_std
x_scores = np.dot(Xr, self.x_weights_)
if Y is not None:
Y = check_array(Y, ensure_2d=False, dtype=np.float64)
if Y.ndim == 1:
Y = Y.reshape(-1, 1)
Yr = (Y - self.y_mean_) / self.y_std_
Yr = (Y - self._y_mean) / self._y_std
y_scores = np.dot(Yr, self.y_weights_)
return x_scores, y_scores
return x_scores
Expand Down
14 changes: 14 additions & 0 deletions sklearn/cross_decomposition/tests/test_pls.py
Expand Up @@ -488,6 +488,20 @@ def test_norm_y_weights_deprecation(Est):
est.norm_y_weights


# TODO: Remove test in 0.26
@pytest.mark.parametrize('Estimator',
(PLSRegression, PLSCanonical, CCA, PLSSVD))
@pytest.mark.parametrize('attribute',
("x_mean_", "y_mean_", "x_std_", "y_std_"))
def test_mean_and_std_deprecation(Estimator, attribute):
rng = np.random.RandomState(0)
X = rng.randn(10, 5)
Y = rng.randn(10, 3)
estimator = Estimator().fit(X, Y)
with pytest.warns(FutureWarning, match=f"{attribute} was deprecated"):
getattr(estimator, attribute)


@pytest.mark.parametrize('n_samples, n_features', [(100, 10), (100, 200)])
@pytest.mark.parametrize('seed', range(10))
def test_singular_value_helpers(n_samples, n_features, seed):
Expand Down
11 changes: 4 additions & 7 deletions sklearn/tests/test_docstring_parameters.py
Expand Up @@ -183,13 +183,12 @@ def test_fit_docstring_attributes(name, Estimator):
doc = docscrape.ClassDoc(Estimator)
attributes = doc['Attributes']

IGNORED = {'CCA', 'ClassifierChain', 'ColumnTransformer',
IGNORED = {'ClassifierChain', 'ColumnTransformer',
'CountVectorizer', 'DictVectorizer', 'FeatureUnion',
'GaussianRandomProjection',
'MultiOutputClassifier', 'MultiOutputRegressor',
'NoSampleWeightWrapper', 'OneVsOneClassifier',
'OutputCodeClassifier', 'Pipeline', 'PLSCanonical',
'PLSRegression', 'PLSSVD', 'RFE', 'RFECV',
'OutputCodeClassifier', 'Pipeline', 'RFE', 'RFECV',
'RegressorChain', 'SelectFromModel',
'SparseCoder', 'SparseRandomProjection',
'SpectralBiclustering', 'StackingClassifier',
Expand Down Expand Up @@ -252,10 +251,8 @@ def test_fit_docstring_attributes(name, Estimator):
with ignore_warnings(category=FutureWarning):
assert hasattr(est, attr.name)

IGNORED = {'Birch', 'CCA',
'LarsCV', 'Lasso',
'OrthogonalMatchingPursuit',
'PLSCanonical', 'PLSSVD'}
IGNORED = {'Birch', 'LarsCV', 'Lasso',
'OrthogonalMatchingPursuit'}

if Estimator.__name__ in IGNORED:
pytest.xfail(
Expand Down