Skip to content

Commit

Permalink
FIX Added store_cv_values to RidgeClassifierCV and a test. (#10297)
Browse files Browse the repository at this point in the history
  • Loading branch information
mabelvj authored and qinhanmin2014 committed Mar 15, 2018
1 parent 0679bbc commit eb1a3c4
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 21 deletions.
6 changes: 6 additions & 0 deletions doc/whats_new/v0.20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,12 @@ Classifiers and regressors
callable and b) the input to the NearestNeighbors model is sparse.
:issue:`9579` by :user:`Thomas Kober <tttthomasssss>`.

- Fixed a bug in :class:`linear_model.RidgeClassifierCV` where
the parameter ``store_cv_values`` was not implemented though
it was documented in ``cv_values`` as a way to set up the storage
of cross-validation values for different alphas. :issue:`10297` by
:user:`Mabel Villalba-Jiménez <mabelvj>`.

- Fixed a bug in :class:`naive_bayes.MultinomialNB` which did not accept vector
valued pseudocounts (alpha).
:issue:`10346` by :user:`Tobias Madsen <TobiasMadsen>`
Expand Down
34 changes: 20 additions & 14 deletions sklearn/linear_model/ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1214,18 +1214,18 @@ class RidgeCV(_BaseRidgeCV, RegressorMixin):
store_cv_values : boolean, default=False
Flag indicating if the cross-validation values corresponding to
each alpha should be stored in the `cv_values_` attribute (see
below). This flag is only compatible with `cv=None` (i.e. using
each alpha should be stored in the ``cv_values_`` attribute (see
below). This flag is only compatible with ``cv=None`` (i.e. using
Generalized Cross-Validation).
Attributes
----------
cv_values_ : array, shape = [n_samples, n_alphas] or \
shape = [n_samples, n_targets, n_alphas], optional
Cross-validation values for each alpha (if `store_cv_values=True` and \
`cv=None`). After `fit()` has been called, this attribute will \
contain the mean squared errors (by default) or the values of the \
`{loss,score}_func` function (if provided in the constructor).
Cross-validation values for each alpha (if ``store_cv_values=True``\
and ``cv=None``). After ``fit()`` has been called, this attribute \
will contain the mean squared errors (by default) or the values \
of the ``{loss,score}_func`` function (if provided in the constructor).
coef_ : array, shape = [n_features] or [n_targets, n_features]
Weight vector(s).
Expand Down Expand Up @@ -1303,14 +1303,19 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
weights inversely proportional to class frequencies in the input data
as ``n_samples / (n_classes * np.bincount(y))``
store_cv_values : boolean, default=False
Flag indicating if the cross-validation values corresponding to
each alpha should be stored in the ``cv_values_`` attribute (see
below). This flag is only compatible with ``cv=None`` (i.e. using
Generalized Cross-Validation).
Attributes
----------
cv_values_ : array, shape = [n_samples, n_alphas] or \
shape = [n_samples, n_responses, n_alphas], optional
Cross-validation values for each alpha (if `store_cv_values=True` and
`cv=None`). After `fit()` has been called, this attribute will contain \
the mean squared errors (by default) or the values of the \
`{loss,score}_func` function (if provided in the constructor).
cv_values_ : array, shape = [n_samples, n_targets, n_alphas], optional
Cross-validation values for each alpha (if ``store_cv_values=True`` and
``cv=None``). After ``fit()`` has been called, this attribute will
contain the mean squared errors (by default) or the values of the
``{loss,score}_func`` function (if provided in the constructor).
coef_ : array, shape = [n_features] or [n_targets, n_features]
Weight vector(s).
Expand All @@ -1336,10 +1341,11 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
"""

def __init__(self, alphas=(0.1, 1.0, 10.0), fit_intercept=True,
normalize=False, scoring=None, cv=None, class_weight=None):
normalize=False, scoring=None, cv=None, class_weight=None,
store_cv_values=False):
super(RidgeClassifierCV, self).__init__(
alphas=alphas, fit_intercept=fit_intercept, normalize=normalize,
scoring=scoring, cv=cv)
scoring=scoring, cv=cv, store_cv_values=store_cv_values)
self.class_weight = class_weight

def fit(self, X, y, sample_weight=None):
Expand Down
38 changes: 31 additions & 7 deletions sklearn/linear_model/tests/test_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,8 +589,7 @@ def test_class_weights_cv():


def test_ridgecv_store_cv_values():
# Test _RidgeCV's store_cv_values attribute.
rng = rng = np.random.RandomState(42)
rng = np.random.RandomState(42)

n_samples = 8
n_features = 5
Expand All @@ -603,13 +602,38 @@ def test_ridgecv_store_cv_values():
# with len(y.shape) == 1
y = rng.randn(n_samples)
r.fit(x, y)
assert_equal(r.cv_values_.shape, (n_samples, n_alphas))
assert r.cv_values_.shape == (n_samples, n_alphas)

# with len(y.shape) == 2
n_targets = 3
y = rng.randn(n_samples, n_targets)
r.fit(x, y)
assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)


def test_ridge_classifier_cv_store_cv_values():
x = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
[1.0, 1.0], [1.0, 0.0]])
y = np.array([1, 1, 1, -1, -1])

n_samples = x.shape[0]
alphas = [1e-1, 1e0, 1e1]
n_alphas = len(alphas)

r = RidgeClassifierCV(alphas=alphas, store_cv_values=True)

# with len(y.shape) == 1
n_targets = 1
r.fit(x, y)
assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)

# with len(y.shape) == 2
n_responses = 3
y = rng.randn(n_samples, n_responses)
y = np.array([[1, 1, 1, -1, -1],
[1, -1, 1, -1, 1],
[-1, -1, 1, -1, -1]]).transpose()
n_targets = y.shape[1]
r.fit(x, y)
assert_equal(r.cv_values_.shape, (n_samples, n_responses, n_alphas))
assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)


def test_ridgecv_sample_weight():
Expand All @@ -632,7 +656,7 @@ def test_ridgecv_sample_weight():
gs = GridSearchCV(Ridge(), parameters, cv=cv)
gs.fit(X, y, sample_weight=sample_weight)

assert_equal(ridgecv.alpha_, gs.best_estimator_.alpha)
assert ridgecv.alpha_ == gs.best_estimator_.alpha
assert_array_almost_equal(ridgecv.coef_, gs.best_estimator_.coef_)


Expand Down

0 comments on commit eb1a3c4

Please sign in to comment.