scikit-learn · ogrisel · Sep 11, 2016 · Aug 23, 2016 · Sep 11, 2016 · Sep 11, 2016
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst
@@ -110,7 +110,7 @@ scoring method.
 
     - :class:`StratifiedKFold` **(n_iter, test_size, train_size, random_state)**
 
-    - :class:`LabelKFold` **(n_splits, shuffle, random_state)**
+    - :class:`GroupKFold` **(n_splits, shuffle, random_state)**
 
 
    *
@@ -119,7 +119,7 @@ scoring method.
 
     - Same as K-Fold but preserves the class distribution within each fold.
 
-    - Ensures that the same label is not in both testing and training sets.
+    - Ensures that the same group is not in both testing and training sets.
 
 
 .. list-table::
@@ -130,34 +130,34 @@ scoring method.
 
     - :class:`StratifiedShuffleSplit`
 
-    - :class:`LabelShuffleSplit`
+    - :class:`GroupShuffleSplit`
 
    *
 
     - Generates train/test indices based on random permutation.
 
     - Same as shuffle split but preserves the class distribution within each iteration.
 
-    - Ensures that the same label is not in both testing and training sets.
+    - Ensures that the same group is not in both testing and training sets.
 
 
 .. list-table::
 
    *
 
-    - :class:`LeaveOneLabelOut` **()**
+    - :class:`LeaveOneGroupOut` **()**
 
-    - :class:`LeavePLabelOut`  **(p)**
+    - :class:`LeavePGroupsOut`  **(p)**
 
     - :class:`LeaveOneOut` **()**
 
 
 
    *
 
-    - Takes a label array to group observations.
+    - Takes a group array to group observations.
 
-    - Leave P labels out.
+    - Leave P groups out.
 
     - Leave one observation out.
 

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -64,16 +64,41 @@ Model Selection Enhancements and API Changes
   - **Parameters ``n_folds`` and ``n_iter`` renamed to ``n_splits``**
 
     Some parameter names have changed:
-    The ``n_folds`` parameter in :class:`model_selection.KFold`,
-    :class:`model_selection.LabelKFold`, and
-    :class:`model_selection.StratifiedKFold` is now renamed to ``n_splits``.
-    The ``n_iter`` parameter in :class:`model_selection.ShuffleSplit`,
-    :class:`model_selection.LabelShuffleSplit`,
-    and :class:`model_selection.StratifiedShuffleSplit` is now renamed
-    to ``n_splits``.
+    The ``n_folds`` parameter in new :class:`model_selection.KFold`,
+    :class:`model_selection.GroupKFold` (see below for the name change),
+    and :class:`model_selection.StratifiedKFold` is now renamed to
+    ``n_splits``. The ``n_iter`` parameter in
+    :class:`model_selection.ShuffleSplit`, the new class
+    :class:`model_selection.GroupShuffleSplit` and
+    :class:`model_selection.StratifiedShuffleSplit` is now renamed to
+    ``n_splits``.
+
+  - **Rename of splitter classes which accepts group labels along with data**
+
+    The cross-validation splitters ``LabelKFold``,
+    ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have
+    been renamed to :class:`model_selection.GroupKFold`,
+    :class:`model_selection.GroupShuffleSplit`,
+    :class:`model_selection.LeaveOneGroupOut` and
+    :class:`model_selection.LeavePGroupsOut` respectively.
+
+    NOTE the change from singular to plural form in
+    :class:`model_selection.LeavePGroupsOut`.
+
+  - **Fit parameter ``labels`` renamed to ``groups``**
+
+    The ``labels`` parameter in the :func:`split` method of the newly renamed
+    splitters :class:`model_selection.GroupKFold`,
+    :class:`model_selection.LeaveOneGroupOut`,
+    :class:`model_selection.LeavePGroupsOut`,
+    :class:`model_selection.GroupShuffleSplit` is renamed to ``groups``
+    following the new nomenclature of their class names.
+
+  - **Parameter ``n_labels`` renamed to ``n_groups``**
+
+    The parameter ``n_labels`` in the newly renamed
+    :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``.
 
-Changelog
----------
 
 New features
 ............
@@ -449,6 +474,20 @@ API changes summary
       :func:`metrics.classification.hamming_loss`.
       (`#7260 <https://github.com/scikit-learn/scikit-learn/pull/7260>`_) by
       `Sebastián Vanrell`_.
+
+    - The splitter classes ``LabelKFold``, ``LabelShuffleSplit``,
+     ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to
+     :class:`model_selection.GroupKFold`,
+     :class:`model_selection.GroupShuffleSplit`,
+     :class:`model_selection.LeaveOneGroupOut`
+     and :class:`model_selection.LeavePGroupsOut` respectively.
+     Also the parameter ``labels`` in the :func:`split` method of the newly
+     renamed splitters :class:`model_selection.LeaveOneGroupOut` and
+     :class:`model_selection.LeavePGroupsOut` is renamed to
+     ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`,
+     the parameter ``n_labels``is renamed to ``n_groups``.
+     (`#6660 <https://github.com/scikit-learn/scikit-learn/pull/6660>`_)
+     by `Raghav RV`_.
 
 
 .. currentmodule:: sklearn

diff --git a/sklearn/model_selection/__init__.py b/sklearn/model_selection/__init__.py
@@ -1,14 +1,14 @@
 from ._split import BaseCrossValidator
 from ._split import KFold
-from ._split import LabelKFold
+from ._split import GroupKFold
 from ._split import StratifiedKFold
 from ._split import TimeSeriesSplit
-from ._split import LeaveOneLabelOut
+from ._split import LeaveOneGroupOut
 from ._split import LeaveOneOut
-from ._split import LeavePLabelOut
+from ._split import LeavePGroupsOut
 from ._split import LeavePOut
 from ._split import ShuffleSplit
-from ._split import LabelShuffleSplit
+from ._split import GroupShuffleSplit
 from ._split import StratifiedShuffleSplit
 from ._split import PredefinedSplit
 from ._split import train_test_split
@@ -30,11 +30,11 @@
            'GridSearchCV',
            'TimeSeriesSplit',
            'KFold',
-           'LabelKFold',
-           'LabelShuffleSplit',
-           'LeaveOneLabelOut',
+           'GroupKFold',
+           'GroupShuffleSplit',
+           'LeaveOneGroupOut',
            'LeaveOneOut',
-           'LeavePLabelOut',
+           'LeavePGroupsOut',
            'LeavePOut',
            'ParameterGrid',
            'ParameterSampler',

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -528,15 +528,15 @@ def inverse_transform(self, Xt):
         self._check_is_fitted('inverse_transform')
         return self.best_estimator_.transform(Xt)
 
-    def _fit(self, X, y, labels, parameter_iterable):
+    def _fit(self, X, y, groups, parameter_iterable):
         """Actual fitting,  performing the search over parameters."""
 
         estimator = self.estimator
         cv = check_cv(self.cv, y, classifier=is_classifier(estimator))
         self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
 
-        X, y, labels = indexable(X, y, labels)
-        n_splits = cv.get_n_splits(X, y, labels)
+        X, y, groups = indexable(X, y, groups)
+        n_splits = cv.get_n_splits(X, y, groups)
         if self.verbose > 0 and isinstance(parameter_iterable, Sized):
             n_candidates = len(parameter_iterable)
             print("Fitting {0} folds for each of {1} candidates, totalling"
@@ -554,7 +554,7 @@ def _fit(self, X, y, labels, parameter_iterable):
                                   self.fit_params, return_parameters=True,
                                   error_score=self.error_score)
           for parameters in parameter_iterable
-          for train, test in cv.split(X, y, labels))
+          for train, test in cv.split(X, y, groups))
 
         test_scores, test_sample_counts, _, parameters = zip(*out)
 
@@ -876,7 +876,7 @@ def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
         self.param_grid = param_grid
         _check_param_grid(param_grid)
 
-    def fit(self, X, y=None, labels=None):
+    def fit(self, X, y=None, groups=None):
         """Run fit with all sets of parameters.
 
         Parameters
@@ -890,11 +890,11 @@ def fit(self, X, y=None, labels=None):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
-        labels : array-like, with shape (n_samples,), optional
+        groups : array-like, with shape (n_samples,), optional
             Group labels for the samples used while splitting the dataset into
             train/test set.
         """
-        return self._fit(X, y, labels, ParameterGrid(self.param_grid))
+        return self._fit(X, y, groups, ParameterGrid(self.param_grid))
 
 
 class RandomizedSearchCV(BaseSearchCV):
@@ -1104,7 +1104,7 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score)
 
-    def fit(self, X, y=None, labels=None):
+    def fit(self, X, y=None, groups=None):
         """Run fit on the estimator with randomly drawn parameters.
 
         Parameters
@@ -1117,11 +1117,11 @@ def fit(self, X, y=None, labels=None):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
-        labels : array-like, with shape (n_samples,), optional
+        groups : array-like, with shape (n_samples,), optional
             Group labels for the samples used while splitting the dataset into
             train/test set.
         """
         sampled_params = ParameterSampler(self.param_distributions,
                                           self.n_iter,
                                           random_state=self.random_state)
-        return self._fit(X, y, labels, sampled_params)
+        return self._fit(X, y, groups, sampled_params)