Skip to content

Commit

Permalink
DOC Fixed doc issues for utils module (#11445)
Browse files Browse the repository at this point in the history
  • Loading branch information
NicolasHug authored and jnothman committed Jul 22, 2018
1 parent cc65bef commit 6d15840
Show file tree
Hide file tree
Showing 10 changed files with 270 additions and 18 deletions.
1 change: 0 additions & 1 deletion sklearn/tests/test_docstring_parameters.py
Expand Up @@ -31,7 +31,6 @@
'model_selection',
'multioutput',
'setup',
'utils',
# Deprecated modules
'cross_validation',
'grid_search',
Expand Down
44 changes: 42 additions & 2 deletions sklearn/utils/__init__.py
Expand Up @@ -120,6 +120,21 @@ def axis0_safe_slice(X, mask, len_mask):
is not going to be the bottleneck, since the number of outliers
and non_outliers are typically non-zero and it makes the code
tougher to follow.
Parameters
----------
X : {array-like, sparse matrix}
Data on which to apply mask.
mask : array
Mask to be used on X.
len_mask : int
The length of the mask.
Returns
-------
mask
"""
if len_mask != 0:
return X[safe_mask(X, mask), :]
Expand Down Expand Up @@ -183,6 +198,8 @@ def resample(*arrays, **options):
Indexable data-structures can be arrays, lists, dataframes or scipy
sparse matrices with consistent first dimension.
Other Parameters
----------------
replace : boolean, True by default
Implements resampling with replacement. If False, this will implement
(sliced) random permutations.
Expand Down Expand Up @@ -293,6 +310,8 @@ def shuffle(*arrays, **options):
Indexable data-structures can be arrays, lists, dataframes or scipy
sparse matrices with consistent first dimension.
Other Parameters
----------------
random_state : int, RandomState instance or None, optional (default=None)
The seed of the pseudo random number generator to use when shuffling
the data. If int, random_state is the seed used by the random number
Expand Down Expand Up @@ -384,6 +403,16 @@ def gen_batches(n, batch_size):
The last slice may contain less than batch_size elements, when batch_size
does not divide n.
Parameters
----------
n : int
batch_size : int
Number of element in each batch
Yields
------
slice of batch_size elements
Examples
--------
>>> from sklearn.utils import gen_batches
Expand All @@ -406,8 +435,19 @@ def gen_batches(n, batch_size):
def gen_even_slices(n, n_packs, n_samples=None):
"""Generator to create n_packs slices going up to n.
Pass n_samples when the slices are to be used for sparse matrix indexing;
slicing off-the-end raises an exception, while it works for NumPy arrays.
Parameters
----------
n : int
n_packs : int
Number of slices to generate.
n_samples : int or None (default = None)
Number of samples. Pass n_samples when the slices are to be used for
sparse matrix indexing; slicing off-the-end raises an exception, while
it works for NumPy arrays.
Yields
------
slice
Examples
--------
Expand Down
9 changes: 9 additions & 0 deletions sklearn/utils/bench.py
Expand Up @@ -10,6 +10,15 @@ def total_seconds(delta):
http://docs.python.org/library/datetime.html\
#datetime.timedelta.total_seconds
Parameters
----------
delta : datetime object
Returns
-------
int
The number of seconds contained in delta
"""

mu_sec = 1e-6 # number of seconds in one microseconds
Expand Down
17 changes: 16 additions & 1 deletion sklearn/utils/deprecation.py
Expand Up @@ -126,12 +126,27 @@ def __getitem__(self, key):
return super(DeprecationDict, self).__getitem__(key)

def get(self, key, default=None):
"""Return the value corresponding to key, else default.
Parameters
----------
key : any hashable object
The key
default : object, optional
The default returned when key is not in dict
"""
# dict does not implement it like this, hence it needs to be overridden
try:
return self[key]
except KeyError:
return default

def add_warning(self, key, *args, **kwargs):
"""Add a warning to be triggered when the specified key is read"""
"""Add a warning to be triggered when the specified key is read
Parameters
----------
key : any hashable object
The key
"""
self._deprecations[key] = (args, kwargs)
8 changes: 7 additions & 1 deletion sklearn/utils/estimator_checks.py
Expand Up @@ -382,7 +382,13 @@ def set_checking_parameters(estimator):


class NotAnArray(object):
" An object that is convertable to an array"
"""An object that is convertible to an array
Parameters
----------
data : array_like
The data.
"""

def __init__(self, data):
self.data = data
Expand Down
76 changes: 71 additions & 5 deletions sklearn/utils/extmath.py
Expand Up @@ -40,8 +40,17 @@ def norm(x):
def squared_norm(x):
"""Squared Euclidean or Frobenius norm of x.
Returns the Euclidean norm when x is a vector, the Frobenius norm when x
is a matrix (2-d array). Faster than norm(x) ** 2.
Faster than norm(x) ** 2.
Parameters
----------
x : array_like
Returns
-------
float
The Euclidean norm when x is a vector, the Frobenius norm when x
is a matrix (2-d array).
"""
x = np.ravel(x, order='K')
if np.issubdtype(x.dtype, np.integer):
Expand All @@ -58,6 +67,18 @@ def row_norms(X, squared=False):
matrices and does not create an X.shape-sized temporary.
Performs no input validation.
Parameters
----------
X : array_like
The input array
squared : bool, optional (default = False)
If True, return squared norms.
Returns
-------
array_like
The row-wise (squared) Euclidean norm of X.
"""
if sparse.issparse(X):
if not isinstance(X, sparse.csr_matrix):
Expand All @@ -76,6 +97,11 @@ def fast_logdet(A):
Equivalent to : np.log(nl.det(A)) but more robust.
It returns -Inf if det(A) is non positive or is not defined.
Parameters
----------
A : array_like
The matrix
"""
sign, ld = np.linalg.slogdet(A)
if not sign > 0:
Expand All @@ -102,7 +128,15 @@ def fast_dot(a, b, out=None):
def density(w, **kwargs):
"""Compute density of a sparse vector
Return a value between 0 and 1
Parameters
----------
w : array_like
The sparse vector
Returns
-------
float
The density of w, between 0 and 1
"""
if hasattr(w, "toarray"):
d = float(w.nnz) / (w.shape[0] * w.shape[1])
Expand Down Expand Up @@ -509,7 +543,12 @@ def svd_flip(u, v, u_based_decision=True):
Parameters
----------
u, v : ndarray
u : ndarray
u and v are the output of `linalg.svd` or
`sklearn.utils.extmath.randomized_svd`, with matching inner dimensions
so one can compute `np.dot(u * s, v)`.
v : ndarray
u and v are the output of `linalg.svd` or
`sklearn.utils.extmath.randomized_svd`, with matching inner dimensions
so one can compute `np.dot(u * s, v)`.
Expand Down Expand Up @@ -624,6 +663,15 @@ def safe_min(X):
Adapated from http://stackoverflow.com/q/13426580
Parameters
----------
X : array_like
The input array or sparse matrix
Returns
-------
Float
The min value of X
"""
if sparse.issparse(X):
if len(X.data) == 0:
Expand All @@ -635,7 +683,25 @@ def safe_min(X):


def make_nonnegative(X, min_value=0):
"""Ensure `X.min()` >= `min_value`."""
"""Ensure `X.min()` >= `min_value`.
Parameters
----------
X : array_like
The matrix to make non-negative
min_value : float
The threshold value
Returns
-------
array_like
The thresholded array
Raises
------
ValueError
When X is sparse
"""
min_ = safe_min(X)
if min_ < min_value:
if sparse.issparse(X):
Expand Down
11 changes: 10 additions & 1 deletion sklearn/utils/fixes.py
Expand Up @@ -199,7 +199,16 @@ def _argmax(arr_or_matrix, axis=None):


def parallel_helper(obj, methodname, *args, **kwargs):
"""Workaround for Python 2 limitations of pickling instance methods"""
"""Workaround for Python 2 limitations of pickling instance methods
Parameters
----------
obj
methodname
*args
**kwargs
"""
return getattr(obj, methodname)(*args, **kwargs)


Expand Down
52 changes: 50 additions & 2 deletions sklearn/utils/mocking.py
Expand Up @@ -6,6 +6,11 @@


class ArraySlicingWrapper(object):
"""
Parameters
----------
array
"""
def __init__(self, array):
self.array = array

Expand All @@ -14,8 +19,12 @@ def __getitem__(self, aslice):


class MockDataFrame(object):

# have shape an length but don't support indexing.
"""
Parameters
----------
array
"""
# have shape and length but don't support indexing.
def __init__(self, array):
self.array = array
self.values = array
Expand Down Expand Up @@ -46,6 +55,13 @@ class CheckingClassifier(BaseEstimator, ClassifierMixin):
Checks some property of X and y in fit / predict.
This allows testing whether pipelines / cross-validation or metaestimators
changed the input.
Parameters
----------
check_y
check_X
foo_param
expected_fit_params
"""
def __init__(self, check_y=None, check_X=None, foo_param=0,
expected_fit_params=None):
Expand All @@ -55,6 +71,22 @@ def __init__(self, check_y=None, check_X=None, foo_param=0,
self.expected_fit_params = expected_fit_params

def fit(self, X, y, **fit_params):
"""
Fit classifier
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples] or [n_samples, n_output], optional
Target relative to X for classification or regression;
None for unsupervised learning.
**fit_params : dict of string -> object
Parameters passed to the ``fit`` method of the estimator
"""
assert_true(len(X) == len(y))
if self.check_X is not None:
assert_true(self.check_X(X))
Expand All @@ -74,11 +106,27 @@ def fit(self, X, y, **fit_params):
return self

def predict(self, T):
"""
Parameters
-----------
T : indexable, length n_samples
"""
if self.check_X is not None:
assert_true(self.check_X(T))
return self.classes_[np.zeros(_num_samples(T), dtype=np.int)]

def score(self, X=None, Y=None):
"""
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Input data, where n_samples is the number of samples and
n_features is the number of features.
Y : array-like, shape = [n_samples] or [n_samples, n_output], optional
Target relative to X for classification or regression;
None for unsupervised learning.
"""
if self.foo_param > 1:
score = 1.
else:
Expand Down

0 comments on commit 6d15840

Please sign in to comment.