scikit-learn · jnothman · Jun 16, 2016 · Jun 15, 2016 · Jun 15, 2016 · Jun 15, 2016
diff --git a/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py b/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py
@@ -53,9 +53,14 @@
     grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1)
     grid_search.fit(docs_train, y_train)
 
-    # TASK: print the cross-validated scores for the each parameters set
-    # explored by the grid search
-    print(grid_search.grid_scores_)
+    # TASK: print the mean and std for each candidate along with the parameter
+    # settings for all the candidates explored by grid search.
+    n_candidates = len(grid_search.results_['params'])
+    for i in range(n_candidates):
+        print(i, 'params - %s; mean - %0.2f; std - %0.2f'
+                 % (grid_search.results_['params'][i],
+                    grid_search.results_['test_mean_score'][i],
+                    grid_search.results_['test_std_score'][i]))
 
     # TASK: Predict the outcome on the testing set and store it in a variable
     # named y_predicted

diff --git a/doc/tutorial/text_analytics/working_with_text_data.rst b/doc/tutorial/text_analytics/working_with_text_data.rst
@@ -446,21 +446,22 @@ that we can use to ``predict``::
   >>> twenty_train.target_names[gs_clf.predict(['God is love'])]
   'soc.religion.christian'
 
-but otherwise, it's a pretty large and clumsy object. We can, however, get the
-optimal parameters out by inspecting the object's ``grid_scores_`` attribute,
-which is a list of parameters/score pairs. To get the best scoring attributes,
-we can do::
+The object's ``best_score_`` and ``best_params_`` attributes store the best
+mean score and the parameters setting corresponding to that score::
 
-  >>> best_parameters, score, _ = max(gs_clf.grid_scores_, key=lambda x: x[1])
+  >>> gs_clf.best_score_
+  0.900...
   >>> for param_name in sorted(parameters.keys()):
-  ...     print("%s: %r" % (param_name, best_parameters[param_name]))
+  ...     print("%s: %r" % (param_name, gs_clf.best_params_[param_name]))
   ...
   clf__alpha: 0.001
   tfidf__use_idf: True
   vect__ngram_range: (1, 1)
 
-  >>> score                                              # doctest: +ELLIPSIS
-  0.900...
+A more detailed summary of the search is available at ``gs_clf.results_``.
+
+The ``results_`` parameter can be easily imported into pandas as a
+``DataFrame`` for further inspection.
 
 .. note:
 

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -12,6 +12,57 @@ Version 0.18
 Changelog
 ---------
 
+.. _model_selection_changes:
+
+Model Selection Enhancements and API Changes
+--------------------------------------------
+
+  - **The ``model_selection`` module**
+
+    The new module :mod:`sklearn.model_selection`, which groups together the
+    functionalities of formerly :mod:`cross_validation`, :mod:`grid_search` and
+    :mod:`learning_curve`, introduces new possibilities such as nested
+    cross-validation and better manipulation of parameter searches with Pandas.
+
+    Many things will stay the same but there are some key differences. Read
+    below to know more about the changes.
+
+  - **Data-independent CV splitters enabling nested cross-validation**
+
+    The new cross-validation splitters, defined in the
+    :mod:`sklearn.model_selection`, are no longer initialized with any
+    data-dependent parameters such as ``y``. Instead they expose a
+    :func:`split` method that takes in the data and yields a generator for the
+    different splits.
+
+    This change makes it possible to use the cross-validation splitters to
+    perform nested cross-validation, facilitated by
+    :class:`model_selection.GridSearchCV` and
+    :class:`model_selection.RandomizedSearchCV` utilities.
+
+  - **The enhanced `results_` attribute**
+
+    The new ``results_`` attribute (of :class:`model_selection.GridSearchCV`
+    and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the
+    ``grid_scores_`` attribute is a dict of 1D arrays with elements in each
+    array corresponding to the parameter settings (i.e. search candidates).
+
+    The ``results_`` dict can be easily imported into ``pandas`` as a
+    ``DataFrame`` for exploring the search results.
+
+    The ``results_`` arrays include scores for each cross-validation split
+    (with keys such as ``test_split0_score``), as well as their mean
+    (``test_mean_score``) and standard deviation (``test_std_score``).
+
+    The ranks for the search candidates (based on their mean
+    cross-validation score) is available at ``results_['test_rank_score']``.
+
+    The parameter values for each parameter is stored separately as numpy
+    masked object arrays. The value, for that search candidate, is masked if
+    the corresponding parameter is not applicable. Additionally a list of all
+    the parameter dicts are stored at ``results_['params']``.
+
+
 New features
 ............
 
@@ -54,7 +105,7 @@ New features
    - Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing
      Elkan's fast K-Means algorithm. By `Andreas Müller`_.
 
-   - Generalization of :func:`model_selection._validation.cross_val_predict`.
+   - Generalization of :func:`model_selection.cross_val_predict`.
      One can pass method names such as `predict_proba` to be used in the cross
      validation framework instead of the default `predict`. By `Ori Ziv`_ and `Sears Merritt`_.
 
@@ -66,11 +117,10 @@ Enhancements
      and `Devashish Deshpande`_.
 
    - The cross-validation iterators are replaced by cross-validation splitters
-     available from :mod:`model_selection`. These expose a ``split`` method
-     that takes in the data and yields a generator for the different splits.
-     This change makes it possible to do nested cross-validation with ease,
-     facilitated by :class:`model_selection.GridSearchCV` and similar
-     utilities.  (`#4294 <https://github.com/scikit-learn/scikit-learn/pull/4294>`_) by `Raghav R V`_.
+     available from :mod:`sklearn.model_selection`.
+     Ref :ref:`model_selection_changes` for more information.
+     (`#4294 <https://github.com/scikit-learn/scikit-learn/pull/4294>`_) by
+     `Raghav R V`_.
 
    - The random forest, extra trees and decision tree estimators now has a
      method ``decision_path`` which returns the decision path of samples in
@@ -144,6 +194,14 @@ Enhancements
    - The :func: `ignore_warnings` now accept a category argument to ignore only
      the warnings of a specified type. By `Thierry Guillemot`_.
 
+   - The new ``results_`` attribute of :class:`model_selection.GridSearchCV`
+     (and :class:`model_selection.RandomizedSearchCV`) can be easily imported
+     into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for
+     more information.
+     (`#6697 <https://github.com/scikit-learn/scikit-learn/pull/6697>`_) by
+     `Raghav R V`_.
+
+
 Bug fixes
 .........
 
@@ -212,10 +270,12 @@ Bug fixes
 API changes summary
 -------------------
 
-   - The :mod:`cross_validation`, :mod:`grid_search` and :mod:`learning_curve`
-     have been deprecated and the classes and functions have been reorganized into
-     the :mod:`model_selection` module.
-     (`#4294 <https://github.com/scikit-learn/scikit-learn/pull/4294>`_) by `Raghav R V`_.
+   - The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and
+     :mod:`sklearn.learning_curve` have been deprecated and the classes and
+     functions have been reorganized into the :mod:`model_selection` module.
+     Ref :ref:`model_selection_changes` for more information.
+     (`#4294 <https://github.com/scikit-learn/scikit-learn/pull/4294>`_) by
+     `Raghav R V`_.
 
    - ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
      Use ``loss`` instead. By `Manoj Kumar`_.
@@ -224,12 +284,20 @@ API changes summary
      :class:`isotonic.IsotonicRegression`. By `Jonathan Arfa`_.
 
    - The old :class:`GMM` is deprecated in favor of the new
-     :class:`GaussianMixture`. The new class compute the Gaussian mixture
-     faster than before and some of computationnal problems have been solved.
+     :class:`GaussianMixture`. The new class computes the Gaussian mixture
+     faster than before and some of computational problems have been solved.
      By `Wei Xue`_ and `Thierry Guillemot`_.
 
+   - The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV`
+     and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of
+     the attribute ``results_``.
+     Ref :ref:`model_selection_changes` for more information.
+     (`#6697 <https://github.com/scikit-learn/scikit-learn/pull/6697>`_) by
+     `Raghav R V`_.
 
 
+.. currentmodule:: sklearn
+
 .. _changes_0_17_1:
 
 Version 0.17.1
@@ -4088,7 +4156,7 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 
 .. _Matteo Visconti di Oleggio Castello: http://www.mvdoc.me
 
-.. _Raghav R V: https://github.com/rvraghav93
+.. _Raghav R V: https://github.com/raghavrv
 
 .. _Trevor Stephens: http://trevorstephens.com/
 

diff --git a/examples/model_selection/grid_search_digits.py b/examples/model_selection/grid_search_digits.py
@@ -60,9 +60,11 @@
     print()
     print("Grid scores on development set:")
     print()
-    for params, mean_score, scores in clf.grid_scores_:
+    means = clf.results_['test_mean_score']
+    stds = clf.results_['test_std_score']
+    for i in range(len(clf.results_['params'])):
         print("%0.3f (+/-%0.03f) for %r"
-              % (mean_score, scores.std() * 2, params))
+              % (means[i], stds[i] * 2, clf.results_['params'][i]))
     print()
 
     print("Detailed classification report:")

diff --git a/examples/model_selection/randomized_search.py b/examples/model_selection/randomized_search.py
@@ -23,7 +23,6 @@
 import numpy as np
 
 from time import time
-from operator import itemgetter
 from scipy.stats import randint as sp_randint
 
 from sklearn.model_selection import GridSearchCV
@@ -40,15 +39,16 @@
 
 
 # Utility function to report best scores
-def report(grid_scores, n_top=3):
-    top_scores = sorted(grid_scores, key=itemgetter(1), reverse=True)[:n_top]
-    for i, score in enumerate(top_scores):
-        print("Model with rank: {0}".format(i + 1))
-        print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
-              score.mean_validation_score,
-              np.std(score.cv_validation_scores)))
-        print("Parameters: {0}".format(score.parameters))
-        print("")
+def report(results, n_top=3):
+    for i in range(1, n_top + 1):
+        candidates = np.flatnonzero(results['test_rank_score'] == i)
+        for candidate in candidates:
+            print("Model with rank: {0}".format(i))
+            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
+                  results['test_mean_score'][candidate],
+                  results['test_std_score'][candidate]))
+            print("Parameters: {0}".format(results['params'][candidate]))
+            print("")
 
 
 # specify parameters and distributions to sample from
@@ -68,7 +68,7 @@ def report(grid_scores, n_top=3):
 random_search.fit(X, y)
 print("RandomizedSearchCV took %.2f seconds for %d candidates"
       " parameter settings." % ((time() - start), n_iter_search))
-report(random_search.grid_scores_)
+report(random_search.results_)
 
 # use a full grid over all parameters
 param_grid = {"max_depth": [3, None],
@@ -84,5 +84,5 @@ def report(grid_scores, n_top=3):
 grid_search.fit(X, y)
 
 print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
-      % (time() - start, len(grid_search.grid_scores_)))
-report(grid_search.grid_scores_)
+      % (time() - start, len(grid_search.results_['params'])))
+report(grid_search.results_)
diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py
@@ -171,11 +171,8 @@ def __call__(self, value, clip=None):
     plt.yticks(())
     plt.axis('tight')
 
-# plot the scores of the grid
-# grid_scores_ contains parameter settings and scores
-# We extract just the scores
-scores = [x[1] for x in grid.grid_scores_]
-scores = np.array(scores).reshape(len(C_range), len(gamma_range))
+scores = grid.results_['test_mean_score'].reshape(len(C_range),
+                                                  len(gamma_range))
 
 # Draw heatmap of the validation accuracy as a function of gamma and C
 #

diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py
@@ -131,7 +131,7 @@
                             cv=ShuffleSplit(train_size=train_size, n_iter=250,
                                             random_state=1))
         grid.fit(X, y)
-        scores = [x[1] for x in grid.grid_scores_]
+        scores = grid.results_['test_mean_score']
 
         scales = [(1, 'No scaling'),
                   ((n_samples * train_size), '1/n_samples'),