DOC Fixed doc issues for utils module (#11445)

scikit-learn · Jul 22, 2018 · 6d15840 · 6d15840
1 parent cc65bef
commit 6d15840
Show file tree

Hide file tree

Showing 10 changed files with 270 additions and 18 deletions.
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
@@ -31,7 +31,6 @@
     'model_selection',
     'multioutput',
     'setup',
-    'utils',
     # Deprecated modules
     'cross_validation',
     'grid_search',

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
@@ -120,6 +120,21 @@ def axis0_safe_slice(X, mask, len_mask):
     is not going to be the bottleneck, since the number of outliers
     and non_outliers are typically non-zero and it makes the code
     tougher to follow.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}
+        Data on which to apply mask.
+
+    mask : array
+        Mask to be used on X.
+
+    len_mask : int
+        The length of the mask.
+
+    Returns
+    -------
+        mask
     """
     if len_mask != 0:
         return X[safe_mask(X, mask), :]
@@ -183,6 +198,8 @@ def resample(*arrays, **options):
         Indexable data-structures can be arrays, lists, dataframes or scipy
         sparse matrices with consistent first dimension.
 
+    Other Parameters
+    ----------------
     replace : boolean, True by default
         Implements resampling with replacement. If False, this will implement
         (sliced) random permutations.
@@ -293,6 +310,8 @@ def shuffle(*arrays, **options):
         Indexable data-structures can be arrays, lists, dataframes or scipy
         sparse matrices with consistent first dimension.
 
+    Other Parameters
+    ----------------
     random_state : int, RandomState instance or None, optional (default=None)
         The seed of the pseudo random number generator to use when shuffling
         the data.  If int, random_state is the seed used by the random number
@@ -384,6 +403,16 @@ def gen_batches(n, batch_size):
     The last slice may contain less than batch_size elements, when batch_size
     does not divide n.
 
+    Parameters
+    ----------
+    n : int
+    batch_size : int
+        Number of element in each batch
+
+    Yields
+    ------
+    slice of batch_size elements
+
     Examples
     --------
     >>> from sklearn.utils import gen_batches
@@ -406,8 +435,19 @@ def gen_batches(n, batch_size):
 def gen_even_slices(n, n_packs, n_samples=None):
     """Generator to create n_packs slices going up to n.
 
-    Pass n_samples when the slices are to be used for sparse matrix indexing;
-    slicing off-the-end raises an exception, while it works for NumPy arrays.
+    Parameters
+    ----------
+    n : int
+    n_packs : int
+        Number of slices to generate.
+    n_samples : int or None (default = None)
+        Number of samples. Pass n_samples when the slices are to be used for
+        sparse matrix indexing; slicing off-the-end raises an exception, while
+        it works for NumPy arrays.
+
+    Yields
+    ------
+    slice
 
     Examples
     --------

diff --git a/sklearn/utils/bench.py b/sklearn/utils/bench.py
@@ -10,6 +10,15 @@ def total_seconds(delta):
 
     http://docs.python.org/library/datetime.html\
 #datetime.timedelta.total_seconds
+
+    Parameters
+    ----------
+    delta : datetime object
+
+    Returns
+    -------
+    int
+        The number of seconds contained in delta
     """
 
     mu_sec = 1e-6  # number of seconds in one microseconds

diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
@@ -126,12 +126,27 @@ def __getitem__(self, key):
         return super(DeprecationDict, self).__getitem__(key)
 
     def get(self, key, default=None):
+        """Return the value corresponding to key, else default.
+
+        Parameters
+        ----------
+        key : any hashable object
+            The key
+        default : object, optional
+            The default returned when key is not in dict
+        """
         # dict does not implement it like this, hence it needs to be overridden
         try:
             return self[key]
         except KeyError:
             return default
 
     def add_warning(self, key, *args, **kwargs):
-        """Add a warning to be triggered when the specified key is read"""
+        """Add a warning to be triggered when the specified key is read
+
+        Parameters
+        ----------
+        key : any hashable object
+            The key
+        """
         self._deprecations[key] = (args, kwargs)
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -382,7 +382,13 @@ def set_checking_parameters(estimator):
 
 
 class NotAnArray(object):
-    " An object that is convertable to an array"
+    """An object that is convertible to an array
+
+    Parameters
+    ----------
+    data : array_like
+        The data.
+    """
 
     def __init__(self, data):
         self.data = data

diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
@@ -40,8 +40,17 @@ def norm(x):
 def squared_norm(x):
     """Squared Euclidean or Frobenius norm of x.
 
-    Returns the Euclidean norm when x is a vector, the Frobenius norm when x
-    is a matrix (2-d array). Faster than norm(x) ** 2.
+    Faster than norm(x) ** 2.
+
+    Parameters
+    ----------
+    x : array_like
+
+    Returns
+    -------
+    float
+        The Euclidean norm when x is a vector, the Frobenius norm when x
+        is a matrix (2-d array).
     """
     x = np.ravel(x, order='K')
     if np.issubdtype(x.dtype, np.integer):
@@ -58,6 +67,18 @@ def row_norms(X, squared=False):
     matrices and does not create an X.shape-sized temporary.
 
     Performs no input validation.
+
+    Parameters
+    ----------
+    X : array_like
+        The input array
+    squared : bool, optional (default = False)
+        If True, return squared norms.
+
+    Returns
+    -------
+    array_like
+        The row-wise (squared) Euclidean norm of X.
     """
     if sparse.issparse(X):
         if not isinstance(X, sparse.csr_matrix):
@@ -76,6 +97,11 @@ def fast_logdet(A):
 
     Equivalent to : np.log(nl.det(A)) but more robust.
     It returns -Inf if det(A) is non positive or is not defined.
+
+    Parameters
+    ----------
+    A : array_like
+        The matrix
     """
     sign, ld = np.linalg.slogdet(A)
     if not sign > 0:
@@ -102,7 +128,15 @@ def fast_dot(a, b, out=None):
 def density(w, **kwargs):
     """Compute density of a sparse vector
 
-    Return a value between 0 and 1
+    Parameters
+    ----------
+    w : array_like
+        The sparse vector
+
+    Returns
+    -------
+    float
+        The density of w, between 0 and 1
     """
     if hasattr(w, "toarray"):
         d = float(w.nnz) / (w.shape[0] * w.shape[1])
@@ -509,7 +543,12 @@ def svd_flip(u, v, u_based_decision=True):
 
     Parameters
     ----------
-    u, v : ndarray
+    u : ndarray
+        u and v are the output of `linalg.svd` or
+        `sklearn.utils.extmath.randomized_svd`, with matching inner dimensions
+        so one can compute `np.dot(u * s, v)`.
+
+    v : ndarray
         u and v are the output of `linalg.svd` or
         `sklearn.utils.extmath.randomized_svd`, with matching inner dimensions
         so one can compute `np.dot(u * s, v)`.
@@ -624,6 +663,15 @@ def safe_min(X):
 
     Adapated from http://stackoverflow.com/q/13426580
 
+    Parameters
+    ----------
+    X : array_like
+        The input array or sparse matrix
+
+    Returns
+    -------
+    Float
+        The min value of X
     """
     if sparse.issparse(X):
         if len(X.data) == 0:
@@ -635,7 +683,25 @@ def safe_min(X):
 
 
 def make_nonnegative(X, min_value=0):
-    """Ensure `X.min()` >= `min_value`."""
+    """Ensure `X.min()` >= `min_value`.
+
+    Parameters
+    ----------
+    X : array_like
+        The matrix to make non-negative
+    min_value : float
+        The threshold value
+
+    Returns
+    -------
+    array_like
+        The thresholded array
+
+    Raises
+    ------
+    ValueError
+        When X is sparse
+    """
     min_ = safe_min(X)
     if min_ < min_value:
         if sparse.issparse(X):

diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
@@ -199,7 +199,16 @@ def _argmax(arr_or_matrix, axis=None):
 
 
 def parallel_helper(obj, methodname, *args, **kwargs):
-    """Workaround for Python 2 limitations of pickling instance methods"""
+    """Workaround for Python 2 limitations of pickling instance methods
+
+    Parameters
+    ----------
+    obj
+    methodname
+    *args
+    **kwargs
+
+    """
     return getattr(obj, methodname)(*args, **kwargs)
 
 

diff --git a/sklearn/utils/mocking.py b/sklearn/utils/mocking.py
@@ -6,6 +6,11 @@
 
 
 class ArraySlicingWrapper(object):
+    """
+    Parameters
+    ----------
+    array
+    """
     def __init__(self, array):
         self.array = array
 
@@ -14,8 +19,12 @@ def __getitem__(self, aslice):
 
 
 class MockDataFrame(object):
-
-    # have shape an length but don't support indexing.
+    """
+    Parameters
+    ----------
+    array
+    """
+    # have shape and length but don't support indexing.
     def __init__(self, array):
         self.array = array
         self.values = array
@@ -46,6 +55,13 @@ class CheckingClassifier(BaseEstimator, ClassifierMixin):
     Checks some property of X and y in fit / predict.
     This allows testing whether pipelines / cross-validation or metaestimators
     changed the input.
+
+    Parameters
+    ----------
+    check_y
+    check_X
+    foo_param
+    expected_fit_params
     """
     def __init__(self, check_y=None, check_X=None, foo_param=0,
                  expected_fit_params=None):
@@ -55,6 +71,22 @@ def __init__(self, check_y=None, check_X=None, foo_param=0,
         self.expected_fit_params = expected_fit_params
 
     def fit(self, X, y, **fit_params):
+        """
+        Fit classifier
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Training vector, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
+            Target relative to X for classification or regression;
+            None for unsupervised learning.
+
+        **fit_params : dict of string -> object
+            Parameters passed to the ``fit`` method of the estimator
+        """
         assert_true(len(X) == len(y))
         if self.check_X is not None:
             assert_true(self.check_X(X))
@@ -74,11 +106,27 @@ def fit(self, X, y, **fit_params):
         return self
 
     def predict(self, T):
+        """
+        Parameters
+        -----------
+        T : indexable, length n_samples
+        """
         if self.check_X is not None:
             assert_true(self.check_X(T))
         return self.classes_[np.zeros(_num_samples(T), dtype=np.int)]
 
     def score(self, X=None, Y=None):
+        """
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Input data, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Y : array-like, shape = [n_samples] or [n_samples, n_output], optional
+            Target relative to X for classification or regression;
+            None for unsupervised learning.
+        """
         if self.foo_param > 1:
             score = 1.
         else: