Skip to content

Commit

Permalink
Allow saving grid search results via a new --save-results option
Browse files Browse the repository at this point in the history
Also, during grid search, just print GridSearch::cv_results_, and
thereby remove an assumption that only one scorer was used.
  • Loading branch information
dnouri committed Apr 20, 2018
1 parent d55639f commit 87f44a4
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 56 deletions.
44 changes: 34 additions & 10 deletions docs/user/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,16 +279,40 @@ Try running ``pld-grid-search`` and see what happens:
At the end, you should see something like this output::
[mean: 0.95000, std: 0.05138, params: {'C': 1.0},
mean: 0.91000, std: 0.05022, params: {'C': 0.3},
mean: 0.84000, std: 0.06408, params: {'C': 0.1}]
What happened? We just tried out three different values for *C*,
and used a three-fold cross-validation to determine the best setting.
The first line is the winner. It tells us that the mean
cross-validation accuracy of the model with *C* set to ``1.0`` is
``0.95`` and that the standard deviation between accuracies in the
cross-validation folds is ``0.05138``.
mean_fit_time mean_score_time mean_test_score mean_train_score param_C \
2 0.000811 0.000268 0.95 0.954831 1
1 0.001456 0.000426 0.91 0.924974 0.3
0 0.002270 0.001272 0.84 0.835621 0.1
params rank_test_score split0_test_score split0_train_score \
2 {'C': 1.0} 1 1.000000 0.938462
1 {'C': 0.3} 2 0.971429 0.923077
0 {'C': 0.1} 3 0.914286 0.876923
split1_test_score split1_train_score split2_test_score \
2 0.878788 0.970149 0.96875
1 0.848485 0.925373 0.90625
0 0.757576 0.835821 0.84375
split2_train_score std_fit_time std_score_time std_test_score \
2 0.955882 0.000148 0.000048 0.051585
1 0.926471 0.000659 0.000089 0.050734
0 0.794118 0.000016 0.000751 0.064636
std_train_score
2 0.012958
1 0.001414
0 0.033805
What happened? We just tried out three different values for *C*, and
used a three-fold cross-validation to determine the best setting. The
first line is the winner. It tells us that the mean cross-validation
accuracy of the model with *C* set to ``1.0`` (``params``) is ``0.95``
(``mean_test_score``) and that the standard deviation between
accuracies in the cross-validation folds is ``0.051585``.
You can also ask to save these results by passing a CSV filename to
the ``--save-results`` option.
Let us take a look at the configuration of ``grid_search``:
Expand Down
27 changes: 15 additions & 12 deletions palladium/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from datetime import datetime
from docopt import docopt
import pandas
from pprint import pformat
from sklearn.metrics import get_scorer
from sklearn.model_selection import GridSearchCV
Expand Down Expand Up @@ -160,7 +161,8 @@ def admin_cmd(argv=sys.argv[1:]): # pragma: no cover


@args_from_config
def grid_search(dataset_loader_train, model, grid_search, scoring=None):
def grid_search(dataset_loader_train, model, grid_search, scoring=None,
save_results=None):
with timer(logger.info, "Loading data"):
X, y = dataset_loader_train()

Expand Down Expand Up @@ -194,15 +196,15 @@ def grid_search(dataset_loader_train, model, grid_search, scoring=None):
gs = GridSearchCV(model, **grid_search_kwargs)
gs.fit(X, y)

scores = []
means = gs.cv_results_['mean_test_score']
stds = gs.cv_results_['std_test_score']
params = gs.cv_results_['params']
for mean, std, param in zip(means, stds, params):
scores.append("mean: {0:.5f}, std: {1:.5f}, params: {2}".format(mean, std, param))
logger.info('\n{}'.format(
pformat(sorted(scores, reverse=True)).replace('"', '')))
return scores
results = pandas.DataFrame(gs.cv_results_)
if save_results:
results.to_csv(save_results, index=False)
pandas.options.display.max_rows = len(results)
pandas.options.display.max_columns = len(results.columns)
if 'rank_test_score' in results:
results = results.sort_values('rank_test_score')
print(results)
return gs


def grid_search_cmd(argv=sys.argv[1:]): # pragma: no cover
Expand All @@ -217,8 +219,9 @@ def grid_search_cmd(argv=sys.argv[1:]): # pragma: no cover
pld-grid-search [options]
Options:
--save-results=<fname> Save results to CSV file
-h --help Show this screen.
"""
docopt(grid_search_cmd.__doc__, argv=argv)
arguments = docopt(grid_search_cmd.__doc__, argv=argv)
initialize_config(__mode__='fit')
grid_search()
grid_search(save_results=arguments['--save-results'])
73 changes: 39 additions & 34 deletions palladium/tests/test_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from unittest.mock import patch

from dateutil.parser import parse
import pandas
import pytest


Expand Down Expand Up @@ -237,41 +238,47 @@ def test_delete():


class TestGridSearch:
@pytest.fixture
def GridSearchCVWithScores(self, monkeypatch):
scores = {
'mean_test_score': [0.1, 0.2],
'std_test_score': [0.06463643, 0.05073433],
'params': [{'C': 0.1}, {'C': 0.3}],
'rank_test_score': [1, 2],
}

GridSearchCV = Mock()
monkeypatch.setattr('palladium.fit.GridSearchCV', GridSearchCV)
GridSearchCV().cv_results_ = scores
return GridSearchCV

@pytest.fixture
def grid_search(self):
from palladium.fit import grid_search
return grid_search

def test_it(self, grid_search):
def test_it(self, grid_search, GridSearchCVWithScores, capsys, tmpdir):
model, dataset_loader_train = Mock(), Mock()
grid_search_params = {'verbose': 4}
X, y = object(), object()
dataset_loader_train.return_value = X, y
scores = {
'mean_test_score': [0.1, 0.2],
'std_test_score': [0.06463643, 0.05073433],
'params': [{'C': 0.1}, {'C': 0.3}]}

with patch('palladium.fit.GridSearchCV') as GridSearchCV:
GridSearchCV().cv_results_ = scores
result = grid_search(
dataset_loader_train, model, grid_search_params)

expected = []
expected.append("mean: {0:.5f}, std: {1:.5f}, params: {2}"
.format(
scores['mean_test_score'][0],
scores['std_test_score'][0],
scores['params'][0]))
expected.append("mean: {0:.5f}, std: {1:.5f}, params: {2}"
.format(
scores['mean_test_score'][1],
scores['std_test_score'][1],
scores['params'][1]))
assert result == expected
results_csv = tmpdir.join('results.csv')
result = grid_search(
dataset_loader_train=dataset_loader_train,
model=model,
grid_search=grid_search_params,
save_results=str(results_csv),
)
dataset_loader_train.assert_called_with()
GridSearchCV.assert_called_with(model, refit=False, verbose=4)
GridSearchCV().fit.assert_called_with(X, y)
GridSearchCVWithScores.assert_called_with(model, refit=False, verbose=4)
GridSearchCVWithScores().fit.assert_called_with(X, y)
assert result is GridSearchCVWithScores()
scores = GridSearchCVWithScores().cv_results_
assert (str(pandas.DataFrame(scores)).strip() ==
capsys.readouterr()[0].strip())
assert (str(pandas.DataFrame(scores)).strip() ==
str(pandas.read_csv(str(results_csv))).strip())

def test_no_score_method_raises(self, grid_search):
model, dataset_loader_train = Mock(spec=['fit', 'predict']), Mock()
Expand All @@ -288,30 +295,28 @@ def test_two_scores_raises(self, grid_search):
grid_search(dataset_loader_train, model,
{'scoring': 'f1'}, scoring='accuracy')

def test_two_scores_priority(self, grid_search):
def test_two_scores_priority(self, grid_search, GridSearchCVWithScores):
# 'scoring' has higher priority than 'model.score'
model = Mock(spec=['fit', 'predict', 'score'])
dataset_loader_train = Mock()
scoring = Mock()
dataset_loader_train.return_value = object(), object()

with patch('palladium.fit.GridSearchCV') as GridSearchCV:
grid_search(dataset_loader_train, model, {}, scoring=scoring)
GridSearchCV.assert_called_with(
grid_search(dataset_loader_train, model, {}, scoring=scoring)
GridSearchCVWithScores.assert_called_with(
model, refit=False, scoring=scoring)

def test_deprecated_scoring(self, grid_search):
def test_deprecated_scoring(self, grid_search, GridSearchCVWithScores):
# 'scoring' inside of 'grid_search' is deprecated
model = Mock(spec=['fit', 'predict', 'score'])
dataset_loader_train = Mock()
scoring = Mock()
dataset_loader_train.return_value = object(), object()

with patch('palladium.fit.GridSearchCV') as GridSearchCV:
with pytest.warns(DeprecationWarning):
grid_search(dataset_loader_train, model,
{'scoring': scoring}, scoring=None)
GridSearchCV.assert_called_with(
with pytest.warns(DeprecationWarning):
grid_search(dataset_loader_train, model,
{'scoring': scoring}, scoring=None)
GridSearchCVWithScores.assert_called_with(
model, refit=False, scoring=scoring)

def test_grid_search(self, grid_search):
Expand Down

0 comments on commit 87f44a4

Please sign in to comment.