Skip to content

Commit

Permalink
[MRG + 2] Print train scores in _fit_and_score (#12613)
Browse files Browse the repository at this point in the history
* print train scores when verbose > 3 in _fit_and_score

* train_scores computed if verbose > 3, and 3 decimals places only

* flake8 warnings solved

* print train score if return_train_score
test coverage increased

* added test for multimetric;
tests cleaned using pytest parametrize

* fixing failed tests for python2...

* revert changes in _scorer

* modified whats_new

* modified whats_new again
  • Loading branch information
marctorsoc authored and amueller committed Nov 22, 2018
1 parent 12384a1 commit f6f7e3c
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 7 deletions.
7 changes: 7 additions & 0 deletions doc/whats_new/v0.21.rst
Expand Up @@ -81,6 +81,13 @@ Support for Python 3.4 and below has been officially dropped.
metrics such as recall, specificity, fall out and miss rate.
:issue:`11179` by :user:`Shangwu Yao <ShangwuYao>` and `Joel Nothman`_.

:mod:`sklearn.model_selection`
......................

- |Enhancement| Method :func:`_fit_and_score` now prints train_scores when
`return_train_scores` is True and `verbose` > 2.
:issue:`12613` by :user:`Marc Torrellas <marctorrellas>`.

:mod:`sklearn.neighbors`
........................

Expand Down
16 changes: 11 additions & 5 deletions sklearn/model_selection/_validation.py
Expand Up @@ -504,7 +504,6 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,

is_multimetric = not callable(scorer)
n_scorers = len(scorer.keys()) if is_multimetric else 1

try:
if y_train is None:
estimator.fit(X_train, **fit_params)
Expand Down Expand Up @@ -554,13 +553,20 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
if return_train_score:
train_scores = _score(estimator, X_train, y_train, scorer,
is_multimetric)

if verbose > 2:
if is_multimetric:
for scorer_name, score in test_scores.items():
msg += ", %s=%s" % (scorer_name, score)
for scorer_name in sorted(test_scores):
msg += ", %s=" % scorer_name
if return_train_score:
msg += "(train=%.3f," % train_scores[scorer_name]
msg += " test=%.3f)" % test_scores[scorer_name]
else:
msg += "%.3f" % test_scores[scorer_name]
else:
msg += ", score=%s" % test_scores
msg += ", score="
msg += ("%.3f" % test_scores if not return_train_score else
"(train=%.3f, test=%.3f)" % (train_scores, test_scores))

if verbose > 1:
total_time = score_time + fit_time
end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
Expand Down
55 changes: 53 additions & 2 deletions sklearn/model_selection/tests/test_validation.py
Expand Up @@ -29,7 +29,7 @@
from sklearn.utils.testing import assert_array_equal
from sklearn.utils.mocking import CheckingClassifier, MockDataFrame

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_score, ShuffleSplit
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate
from sklearn.model_selection import permutation_test_score
Expand All @@ -44,6 +44,7 @@
from sklearn.model_selection import validation_curve
from sklearn.model_selection._validation import _check_is_permutation
from sklearn.model_selection._validation import _fit_and_score
from sklearn.model_selection._validation import _score

from sklearn.datasets import make_regression
from sklearn.datasets import load_boston
Expand Down Expand Up @@ -1477,7 +1478,7 @@ def test_permutation_test_score_pandas():
permutation_test_score(clf, X_df, y_ser)


def test_fit_and_score():
def test_fit_and_score_failing():
# Create a failing classifier to deliberately fail
failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
# dummy X data
Expand Down Expand Up @@ -1537,3 +1538,53 @@ def test_fit_and_score():
error_score='unvalid-string')

assert_equal(failing_clf.score(), 0.) # FailingClassifier coverage


def test_fit_and_score_working():
X, y = make_classification(n_samples=30, random_state=0)
clf = SVC(kernel="linear", random_state=0)
train, test = next(ShuffleSplit().split(X))
# Test return_parameters option
fit_and_score_args = [clf, X, y, dict(), train, test, 0]
fit_and_score_kwargs = {'parameters': {'max_iter': 100, 'tol': 0.1},
'fit_params': None,
'return_parameters': True}
result = _fit_and_score(*fit_and_score_args,
**fit_and_score_kwargs)
assert result[-1] == fit_and_score_kwargs['parameters']


def three_params_scorer(i, j, k):
return 3.4213


@pytest.mark.parametrize("return_train_score, scorer, expected", [
(False, three_params_scorer,
"[CV] .................................... , score=3.421, total= 0.0s"),
(True, three_params_scorer,
"[CV] ................ , score=(train=3.421, test=3.421), total= 0.0s"),
(True, {'sc1': three_params_scorer, 'sc2': three_params_scorer},
"[CV] , sc1=(train=3.421, test=3.421)"
", sc2=(train=3.421, test=3.421), total= 0.0s")
])
def test_fit_and_score_verbosity(capsys, return_train_score, scorer, expected):
X, y = make_classification(n_samples=30, random_state=0)
clf = SVC(kernel="linear", random_state=0)
train, test = next(ShuffleSplit().split(X))

# test print without train score
fit_and_score_args = [clf, X, y, scorer, train, test, 10, None, None]
fit_and_score_kwargs = {'return_train_score': return_train_score}
_fit_and_score(*fit_and_score_args, **fit_and_score_kwargs)
out, _ = capsys.readouterr()
assert out.split('\n')[1] == expected


def test_score():
error_message = "scoring must return a number, got None"

def two_params_scorer(estimator, X_test):
return None
fit_and_score_args = [None, None, None, two_params_scorer]
assert_raise_message(ValueError, error_message,
_score, *fit_and_score_args)

0 comments on commit f6f7e3c

Please sign in to comment.