From f6f7e3cfd365b39d4bc5a98cc8914c3a528477d6 Mon Sep 17 00:00:00 2001
From: Marc Torrellas <marc.torsoc@gmail.com>
Date: Thu, 22 Nov 2018 01:22:24 +0000
Subject: [PATCH] [MRG + 2] Print train scores in _fit_and_score (#12613)

* print train scores when verbose > 3 in _fit_and_score

* train_scores computed if verbose > 3, and 3 decimals places only

* flake8 warnings solved

* print train score if return_train_score
test coverage increased

* added test for multimetric;
tests cleaned using pytest parametrize

* fixing failed tests for python2...

* revert changes in _scorer

* modified whats_new

* modified whats_new again
---
 doc/whats_new/v0.21.rst                       |  7 +++
 sklearn/model_selection/_validation.py        | 16 ++++--
 .../model_selection/tests/test_validation.py  | 55 ++++++++++++++++++-
 3 files changed, 71 insertions(+), 7 deletions(-)
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index c56dbc2144b3f..eef1017e71cdb 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -81,6 +81,13 @@ Support for Python 3.4 and below has been officially dropped.
   metrics such as recall, specificity, fall out and miss rate.
   :issue:`11179` by :user:`Shangwu Yao <ShangwuYao>` and `Joel Nothman`_.
 
+:mod:`sklearn.model_selection`
+......................
+
+- |Enhancement| Method :func:`_fit_and_score` now prints train_scores when
+  `return_train_scores` is True and `verbose` > 2.
+  :issue:`12613` by :user:`Marc Torrellas <marctorrellas>`.
+
 :mod:`sklearn.neighbors`
 ........................
 
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 125b610098404..5db2f29297d86 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -504,7 +504,6 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
 
     is_multimetric = not callable(scorer)
     n_scorers = len(scorer.keys()) if is_multimetric else 1
-
     try:
         if y_train is None:
             estimator.fit(X_train, **fit_params)
@@ -554,13 +553,20 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
         if return_train_score:
             train_scores = _score(estimator, X_train, y_train, scorer,
                                   is_multimetric)
-
     if verbose > 2:
         if is_multimetric:
-            for scorer_name, score in test_scores.items():
-                msg += ", %s=%s" % (scorer_name, score)
+            for scorer_name in sorted(test_scores):
+                msg += ", %s=" % scorer_name
+                if return_train_score:
+                    msg += "(train=%.3f," % train_scores[scorer_name]
+                    msg += " test=%.3f)" % test_scores[scorer_name]
+                else:
+                    msg += "%.3f" % test_scores[scorer_name]
         else:
-            msg += ", score=%s" % test_scores
+            msg += ", score="
+            msg += ("%.3f" % test_scores if not return_train_score else
+                    "(train=%.3f, test=%.3f)" % (train_scores, test_scores))
+
     if verbose > 1:
         total_time = score_time + fit_time
         end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time))
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index a7352972173fc..7816293ebb3b4 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -29,7 +29,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.mocking import CheckingClassifier, MockDataFrame
 
-from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import cross_val_score, ShuffleSplit
 from sklearn.model_selection import cross_val_predict
 from sklearn.model_selection import cross_validate
 from sklearn.model_selection import permutation_test_score
@@ -44,6 +44,7 @@
 from sklearn.model_selection import validation_curve
 from sklearn.model_selection._validation import _check_is_permutation
 from sklearn.model_selection._validation import _fit_and_score
+from sklearn.model_selection._validation import _score
 
 from sklearn.datasets import make_regression
 from sklearn.datasets import load_boston
@@ -1477,7 +1478,7 @@ def test_permutation_test_score_pandas():
         permutation_test_score(clf, X_df, y_ser)
 
 
-def test_fit_and_score():
+def test_fit_and_score_failing():
     # Create a failing classifier to deliberately fail
     failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
     # dummy X data
@@ -1537,3 +1538,53 @@ def test_fit_and_score():
                          error_score='unvalid-string')
 
     assert_equal(failing_clf.score(), 0.)  # FailingClassifier coverage
+
+
+def test_fit_and_score_working():
+    X, y = make_classification(n_samples=30, random_state=0)
+    clf = SVC(kernel="linear", random_state=0)
+    train, test = next(ShuffleSplit().split(X))
+    # Test return_parameters option
+    fit_and_score_args = [clf, X, y, dict(), train, test, 0]
+    fit_and_score_kwargs = {'parameters': {'max_iter': 100, 'tol': 0.1},
+                            'fit_params': None,
+                            'return_parameters': True}
+    result = _fit_and_score(*fit_and_score_args,
+                            **fit_and_score_kwargs)
+    assert result[-1] == fit_and_score_kwargs['parameters']
+
+
+def three_params_scorer(i, j, k):
+    return 3.4213
+
+
+@pytest.mark.parametrize("return_train_score, scorer, expected", [
+    (False, three_params_scorer,
+     "[CV] .................................... , score=3.421, total=   0.0s"),
+    (True, three_params_scorer,
+     "[CV] ................ , score=(train=3.421, test=3.421), total=   0.0s"),
+    (True, {'sc1': three_params_scorer, 'sc2': three_params_scorer},
+     "[CV]  , sc1=(train=3.421, test=3.421)"
+     ", sc2=(train=3.421, test=3.421), total=   0.0s")
+])
+def test_fit_and_score_verbosity(capsys, return_train_score, scorer, expected):
+    X, y = make_classification(n_samples=30, random_state=0)
+    clf = SVC(kernel="linear", random_state=0)
+    train, test = next(ShuffleSplit().split(X))
+
+    # test print without train score
+    fit_and_score_args = [clf, X, y, scorer, train, test, 10, None, None]
+    fit_and_score_kwargs = {'return_train_score': return_train_score}
+    _fit_and_score(*fit_and_score_args, **fit_and_score_kwargs)
+    out, _ = capsys.readouterr()
+    assert out.split('\n')[1] == expected
+
+
+def test_score():
+    error_message = "scoring must return a number, got None"
+
+    def two_params_scorer(estimator, X_test):
+        return None
+    fit_and_score_args = [None, None, None, two_params_scorer]
+    assert_raise_message(ValueError, error_message,
+                         _score, *fit_and_score_args)