Allow saving grid search results via a new --save-results option

Also, during grid search, just print GridSearch::cv_results_, and thereby remove an assumption that only one scorer was used.
ottogroup · Apr 20, 2018 · 87f44a4 · 87f44a4
1 parent d55639f
commit 87f44a4
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 56 deletions.
diff --git a/docs/user/tutorial.rst b/docs/user/tutorial.rst
@@ -279,16 +279,40 @@ Try running ``pld-grid-search`` and see what happens:
 
 At the end, you should see something like this output::
 
-  [mean: 0.95000, std: 0.05138, params: {'C': 1.0},
-   mean: 0.91000, std: 0.05022, params: {'C': 0.3},
-   mean: 0.84000, std: 0.06408, params: {'C': 0.1}]
-
-What happened?  We just tried out three different values for *C*,
-and used a three-fold cross-validation to determine the best setting.
-The first line is the winner.  It tells us that the mean
-cross-validation accuracy of the model with *C* set to ``1.0`` is
-``0.95`` and that the standard deviation between accuracies in the
-cross-validation folds is ``0.05138``.
+     mean_fit_time  mean_score_time  mean_test_score  mean_train_score param_C  \
+  2       0.000811         0.000268             0.95          0.954831       1
+  1       0.001456         0.000426             0.91          0.924974     0.3
+  0       0.002270         0.001272             0.84          0.835621     0.1
+
+         params  rank_test_score  split0_test_score  split0_train_score  \
+  2  {'C': 1.0}                1           1.000000            0.938462
+  1  {'C': 0.3}                2           0.971429            0.923077
+  0  {'C': 0.1}                3           0.914286            0.876923
+
+     split1_test_score  split1_train_score  split2_test_score  \
+  2           0.878788            0.970149            0.96875
+  1           0.848485            0.925373            0.90625
+  0           0.757576            0.835821            0.84375
+
+     split2_train_score  std_fit_time  std_score_time  std_test_score  \
+  2            0.955882      0.000148        0.000048        0.051585
+  1            0.926471      0.000659        0.000089        0.050734
+  0            0.794118      0.000016        0.000751        0.064636
+
+     std_train_score
+  2         0.012958
+  1         0.001414
+  0         0.033805
+
+What happened?  We just tried out three different values for *C*, and
+used a three-fold cross-validation to determine the best setting.  The
+first line is the winner.  It tells us that the mean cross-validation
+accuracy of the model with *C* set to ``1.0`` (``params``) is ``0.95``
+(``mean_test_score``) and that the standard deviation between
+accuracies in the cross-validation folds is ``0.051585``.
+
+You can also ask to save these results by passing a CSV filename to
+the ``--save-results`` option.
 
 Let us take a look at the configuration of ``grid_search``:
 

diff --git a/palladium/fit.py b/palladium/fit.py
@@ -6,6 +6,7 @@
 
 from datetime import datetime
 from docopt import docopt
+import pandas
 from pprint import pformat
 from sklearn.metrics import get_scorer
 from sklearn.model_selection import GridSearchCV
@@ -160,7 +161,8 @@ def admin_cmd(argv=sys.argv[1:]):  # pragma: no cover
 
 
 @args_from_config
-def grid_search(dataset_loader_train, model, grid_search, scoring=None):
+def grid_search(dataset_loader_train, model, grid_search, scoring=None,
+                save_results=None):
     with timer(logger.info, "Loading data"):
         X, y = dataset_loader_train()
 
@@ -194,15 +196,15 @@ def grid_search(dataset_loader_train, model, grid_search, scoring=None):
         gs = GridSearchCV(model, **grid_search_kwargs)
         gs.fit(X, y)
 
-    scores = []
-    means = gs.cv_results_['mean_test_score']
-    stds = gs.cv_results_['std_test_score']
-    params = gs.cv_results_['params']
-    for mean, std, param in zip(means, stds, params):
-        scores.append("mean: {0:.5f}, std: {1:.5f}, params: {2}".format(mean, std, param))
-    logger.info('\n{}'.format(
-        pformat(sorted(scores, reverse=True)).replace('"', '')))
-    return scores
+    results = pandas.DataFrame(gs.cv_results_)
+    if save_results:
+        results.to_csv(save_results, index=False)
+    pandas.options.display.max_rows = len(results)
+    pandas.options.display.max_columns = len(results.columns)
+    if 'rank_test_score' in results:
+        results = results.sort_values('rank_test_score')
+    print(results)
+    return gs
 
 
 def grid_search_cmd(argv=sys.argv[1:]):  # pragma: no cover
@@ -217,8 +219,9 @@ def grid_search_cmd(argv=sys.argv[1:]):  # pragma: no cover
   pld-grid-search [options]
 
 Options:
+  --save-results=<fname>   Save results to CSV file
   -h --help                Show this screen.
 """
-    docopt(grid_search_cmd.__doc__, argv=argv)
+    arguments = docopt(grid_search_cmd.__doc__, argv=argv)
     initialize_config(__mode__='fit')
-    grid_search()
+    grid_search(save_results=arguments['--save-results'])
diff --git a/palladium/tests/test_fit.py b/palladium/tests/test_fit.py
@@ -6,6 +6,7 @@
 from unittest.mock import patch
 
 from dateutil.parser import parse
+import pandas
 import pytest
 
 
@@ -237,41 +238,47 @@ def test_delete():
 
 
 class TestGridSearch:
+    @pytest.fixture
+    def GridSearchCVWithScores(self, monkeypatch):
+        scores = {
+            'mean_test_score': [0.1, 0.2],
+            'std_test_score': [0.06463643, 0.05073433],
+            'params': [{'C': 0.1}, {'C': 0.3}],
+            'rank_test_score': [1, 2],
+            }
+
+        GridSearchCV = Mock()
+        monkeypatch.setattr('palladium.fit.GridSearchCV', GridSearchCV)
+        GridSearchCV().cv_results_ = scores
+        return GridSearchCV
+
     @pytest.fixture
     def grid_search(self):
         from palladium.fit import grid_search
         return grid_search
 
-    def test_it(self, grid_search):
+    def test_it(self, grid_search, GridSearchCVWithScores, capsys, tmpdir):
         model, dataset_loader_train = Mock(), Mock()
         grid_search_params = {'verbose': 4}
         X, y = object(), object()
         dataset_loader_train.return_value = X, y
-        scores = {
-            'mean_test_score': [0.1, 0.2],
-            'std_test_score': [0.06463643, 0.05073433],
-            'params': [{'C': 0.1}, {'C': 0.3}]}
 
-        with patch('palladium.fit.GridSearchCV') as GridSearchCV:
-            GridSearchCV().cv_results_ = scores
-            result = grid_search(
-                dataset_loader_train, model, grid_search_params)
-
-        expected = []
-        expected.append("mean: {0:.5f}, std: {1:.5f}, params: {2}"
-                        .format(
-                            scores['mean_test_score'][0],
-                            scores['std_test_score'][0],
-                            scores['params'][0]))
-        expected.append("mean: {0:.5f}, std: {1:.5f}, params: {2}"
-                        .format(
-                            scores['mean_test_score'][1],
-                            scores['std_test_score'][1],
-                            scores['params'][1]))
-        assert result == expected
+        results_csv = tmpdir.join('results.csv')
+        result = grid_search(
+            dataset_loader_train=dataset_loader_train,
+            model=model,
+            grid_search=grid_search_params,
+            save_results=str(results_csv),
+            )
         dataset_loader_train.assert_called_with()
-        GridSearchCV.assert_called_with(model, refit=False, verbose=4)
-        GridSearchCV().fit.assert_called_with(X, y)
+        GridSearchCVWithScores.assert_called_with(model, refit=False, verbose=4)
+        GridSearchCVWithScores().fit.assert_called_with(X, y)
+        assert result is GridSearchCVWithScores()
+        scores = GridSearchCVWithScores().cv_results_
+        assert (str(pandas.DataFrame(scores)).strip() ==
+                capsys.readouterr()[0].strip())
+        assert (str(pandas.DataFrame(scores)).strip() ==
+                str(pandas.read_csv(str(results_csv))).strip())
 
     def test_no_score_method_raises(self, grid_search):
         model, dataset_loader_train = Mock(spec=['fit', 'predict']), Mock()
@@ -288,30 +295,28 @@ def test_two_scores_raises(self, grid_search):
             grid_search(dataset_loader_train, model,
                         {'scoring': 'f1'}, scoring='accuracy')
 
-    def test_two_scores_priority(self, grid_search):
+    def test_two_scores_priority(self, grid_search, GridSearchCVWithScores):
         # 'scoring' has higher priority than 'model.score'
         model = Mock(spec=['fit', 'predict', 'score'])
         dataset_loader_train = Mock()
         scoring = Mock()
         dataset_loader_train.return_value = object(), object()
 
-        with patch('palladium.fit.GridSearchCV') as GridSearchCV:
-            grid_search(dataset_loader_train, model, {}, scoring=scoring)
-        GridSearchCV.assert_called_with(
+        grid_search(dataset_loader_train, model, {}, scoring=scoring)
+        GridSearchCVWithScores.assert_called_with(
             model, refit=False, scoring=scoring)
 
-    def test_deprecated_scoring(self, grid_search):
+    def test_deprecated_scoring(self, grid_search, GridSearchCVWithScores):
         # 'scoring' inside of 'grid_search' is deprecated
         model = Mock(spec=['fit', 'predict', 'score'])
         dataset_loader_train = Mock()
         scoring = Mock()
         dataset_loader_train.return_value = object(), object()
 
-        with patch('palladium.fit.GridSearchCV') as GridSearchCV:
-            with pytest.warns(DeprecationWarning):
-                grid_search(dataset_loader_train, model,
-                            {'scoring': scoring}, scoring=None)
-        GridSearchCV.assert_called_with(
+        with pytest.warns(DeprecationWarning):
+            grid_search(dataset_loader_train, model,
+                        {'scoring': scoring}, scoring=None)
+        GridSearchCVWithScores.assert_called_with(
             model, refit=False, scoring=scoring)
 
     def test_grid_search(self, grid_search):