DOC add test for numpydoc validation and documented param/attributes (#…

…869)
scikit-learn-contrib · Oct 20, 2021 · 58f4a11 · 58f4a11
1 parent 56eefdf
commit 58f4a11
Show file tree

Hide file tree

Showing 35 changed files with 774 additions and 76 deletions.
diff --git a/build_tools/azure/install.cmd b/build_tools/azure/install.cmd
@@ -11,7 +11,7 @@ IF "%PYTHON_ARCH%"=="64" (
     call deactivate
     @rem Clean up any left-over from a previous build
     conda remove --all -q -y -n %VIRTUALENV%
-    conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython wheel joblib git
+    conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython wheel joblib git -c conda-forge
 
     call activate %VIRTUALENV%
 

diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
@@ -7,7 +7,7 @@ UNAMESTR=`uname`
 
 make_conda() {
     TO_INSTALL="$@"
-    conda create -n $VIRTUALENV --yes $TO_INSTALL
+    conda create -n $VIRTUALENV --yes $TO_INSTALL -c conda-forge
     source activate $VIRTUALENV
 }
 
@@ -65,7 +65,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
     fi
 
     if [[ -n "$TO_INSTALL" ]]; then
-       conda install --yes $TO_INSTALL
+       conda install --yes $TO_INSTALL -c conda-forge
     fi
 
     if [[ -n "$KERAS_VERSION" ]]; then

diff --git a/imblearn/base.py b/imblearn/base.py
@@ -140,6 +140,24 @@ def _identity(X, y):
     return X, y
 
 
+def is_sampler(estimator):
+    """Return True if the given estimator is a sampler, False otherwise.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator to test.
+
+    Returns
+    -------
+    is_sampler : bool
+        True if estimator is a sampler, otherwise False.
+    """
+    if estimator._estimator_type == "sampler":
+        return True
+    return False
+
+
 class FunctionSampler(BaseSampler):
     """Construct a sampler from calling an arbitrary callable.
 
@@ -166,9 +184,20 @@ class FunctionSampler(BaseSampler):
 
         .. versionadded:: 0.6
 
+    Attributes
+    ----------
+    sampling_strategy_ : dict
+        Dictionary containing the information to sample the dataset. The keys
+        corresponds to the class labels from which to sample and the values
+        are the number of samples to sample.
+
+    n_features_in_ : int
+        Number of features in the input dataset.
+
+        .. versionadded:: 0.9
+
     See Also
     --------
-
     sklearn.preprocessing.FunctionTransfomer : Stateless transformer.
 
     Notes

diff --git a/imblearn/combine/_smote_enn.py b/imblearn/combine/_smote_enn.py
@@ -49,6 +49,25 @@ class SMOTEENN(BaseSampler):
 
     {n_jobs}
 
+    Attributes
+    ----------
+    sampling_strategy_ : dict
+        Dictionary containing the information to sample the dataset. The keys
+        corresponds to the class labels from which to sample and the values
+        are the number of samples to sample.
+
+    smote_ : sampler object
+        The validated :class:`~imblearn.over_sampling.SMOTE` instance.
+
+    enn_ : sampler object
+        The validated :class:`~imblearn.under_sampling.EditedNearestNeighbours`
+        instance.
+
+    n_features_in_ : int
+        Number of features in the input dataset.
+
+        .. versionadded:: 0.9
+
     See Also
     --------
     SMOTETomek : Over-sample using SMOTE followed by under-sampling removing

diff --git a/imblearn/combine/_smote_tomek.py b/imblearn/combine/_smote_tomek.py
@@ -49,6 +49,24 @@ class SMOTETomek(BaseSampler):
 
     {n_jobs}
 
+    Attributes
+    ----------
+    sampling_strategy_ : dict
+        Dictionary containing the information to sample the dataset. The keys
+        corresponds to the class labels from which to sample and the values
+        are the number of samples to sample.
+
+    smote_ : sampler object
+        The validated :class:`~imblearn.over_sampling.SMOTE` instance.
+
+    tomek_ : sampler object
+        The validated :class:`~imblearn.under_sampling.TomekLinks` instance.
+
+    n_features_in_ : int
+        Number of features in the input dataset.
+
+        .. versionadded:: 0.9
+
     See Also
     --------
     SMOTEENN : Over-sample using SMOTE followed by under-sampling using Edited

diff --git a/imblearn/datasets/_imbalance.py b/imblearn/datasets/_imbalance.py
@@ -16,8 +16,7 @@
 def make_imbalance(
     X, y, *, sampling_strategy=None, random_state=None, verbose=False, **kwargs
 ):
-    """Turns a dataset into an imbalanced dataset with a specific sampling
-    strategy.
+    """Turn a dataset into an imbalanced dataset with a specific sampling strategy.
 
     A simple toy dataset to visualize clustering and classification
     algorithms.
@@ -52,7 +51,7 @@ def make_imbalance(
     verbose : bool, default=False
         Show information regarding the sampling.
 
-    kwargs : dict
+    **kwargs : dict
         Dictionary of additional keyword arguments to pass to
         ``sampling_strategy``.
 
@@ -62,7 +61,7 @@ def make_imbalance(
         The array containing the imbalanced data.
 
     y_resampled : ndarray of shape (n_samples_new)
-        The corresponding label of `X_resampled`
+        The corresponding label of `X_resampled`.
 
     Notes
     -----

diff --git a/imblearn/datasets/_zenodo.py b/imblearn/datasets/_zenodo.py
@@ -38,7 +38,6 @@
 .. [1] Ding, Zejin, "Diversified Ensemble Classifiers for Highly
    Imbalanced Data Learning and their Application in Bioinformatics."
    Dissertation, Georgia State University, (2011).
-
 """
 
 # Author: Guillaume Lemaitre
@@ -147,12 +146,12 @@ def fetch_datasets(
         The ordered is defined by ``filter_data``. Each Bunch object ---
         referred as dataset --- have the following attributes:
 
-    dataset.data : ndarray of shape (n_samples, n_features)
+        dataset.data : ndarray of shape (n_samples, n_features)
 
-    dataset.target : ndarray of shape (n_samples,)
+        dataset.target : ndarray of shape (n_samples,)
 
-    dataset.DESCR : str
-        Description of the each dataset.
+        dataset.DESCR : str
+            Description of the each dataset.
 
     Notes
     -----

diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
@@ -111,6 +111,9 @@ class BalancedBaggingClassifier(BaggingClassifier):
     estimators_ : list of estimators
         The collection of fitted base estimators.
 
+    sampler_ : sampler object
+        The validate sampler created from the `sampler` parameter.
+
     estimators_samples_ : list of ndarray
         The subset of drawn samples (i.e., the in-bag samples) for each base
         estimator. Each subset is defined by a boolean mask.
@@ -133,6 +136,11 @@ class BalancedBaggingClassifier(BaggingClassifier):
         was never left out during the bootstrap. In this case,
         ``oob_decision_function_`` might contain NaN.
 
+    n_features_in_ : int
+        Number of features in the input dataset.
+
+        .. versionadded:: 0.9
+
     See Also
     --------
     BalancedRandomForestClassifier : Random forest applying random-under

diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
@@ -73,12 +73,23 @@ class EasyEnsembleClassifier(BaggingClassifier):
     estimators_ : list of estimators
         The collection of fitted base estimators.
 
+    estimators_samples_ : list of arrays
+        The subset of drawn samples for each base estimator.
+
+    estimators_features_ : list of arrays
+        The subset of drawn features for each base estimator.
+
     classes_ : array, shape (n_classes,)
         The classes labels.
 
     n_classes_ : int or list
         The number of classes.
 
+    n_features_in_ : int
+        Number of features in the input dataset.
+
+        .. versionadded:: 0.9
+
     See Also
     --------
     BalancedBaggingClassifier : Bagging classifier for which each base

diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
@@ -230,10 +230,17 @@ class BalancedRandomForestClassifier(RandomForestClassifier):
 
     Attributes
     ----------
-    estimators_ : list of DecisionTreeClassifier
+    base_estimator_ : :class:`~sklearn.tree.DecisionTreeClassifier` instance
+        The child estimator template used to create the collection of fitted
+        sub-estimators.
+
+    estimators_ : list of :class:`~sklearn.tree.DecisionTreeClassifier`
         The collection of fitted sub-estimators.
 
-    samplers_ : list of RandomUnderSampler
+    base_sampler_ : :class:`~imblearn.under_sampling.RandomUnderSampler`
+        The base sampler used to construct the subsequent list of samplers.
+
+    samplers_ : list of :class:`~imblearn.under_sampling.RandomUnderSampler`
         The collection of fitted samplers.
 
     pipelines_ : list of Pipeline.
@@ -250,6 +257,11 @@ class labels (multi-output problem).
     n_features_ : int
         The number of features when ``fit`` is performed.
 
+    n_features_in_ : int
+        Number of features in the input dataset.
+
+        .. versionadded:: 0.9
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -628,7 +640,7 @@ def _set_oob_score(self, X, y):
     @property
     def n_features_(self):
         """Number of features when fitting the estimator."""
-        return getattr(self.n_features_in_, self._n_features)
+        return getattr(self.n_features_in_, "n_features_", self._n_features)
 
     def _more_tags(self):
         return {

diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
@@ -68,7 +68,10 @@ class RUSBoostClassifier(AdaBoostClassifier):
     estimators_ : list of classifiers
         The collection of fitted sub-estimators.
 
-    samplers_ : list of RandomUnderSampler
+    base_sampler_ : :class:`~imblearn.under_sampling.RandomUnderSampler`
+        The base sampler used to generate the subsequent samplers.
+
+    samplers_ : list of :class:`~imblearn.under_sampling.RandomUnderSampler`
         The collection of fitted samplers.
 
     pipelines_ : list of Pipeline
@@ -90,6 +93,11 @@ class RUSBoostClassifier(AdaBoostClassifier):
     feature_importances_ : ndarray of shape (n_features,)
         The feature importances if supported by the ``base_estimator``.
 
+    n_features_in_ : int
+        Number of features in the input dataset.
+
+        .. versionadded:: 0.9
+
     See Also
     --------
     BalancedBaggingClassifier : Bagging classifier for which each base

diff --git a/imblearn/exceptions.py b/imblearn/exceptions.py
@@ -8,6 +8,24 @@
 
 
 def raise_isinstance_error(variable_name, possible_type, variable):
+    """Raise consistent error message for isinstance() function.
+
+    Parameters
+    ----------
+    variable_name : str
+        The name of the variable.
+
+    possible_type : type
+        The possible type of the variable.
+
+    variable : object
+        The variable to check.
+
+    Raises
+    ------
+    ValueError
+        If the instance is not of the possible type.
+    """
     raise ValueError(
         f"{variable_name} has to be one of {possible_type}. "
         f"Got {type(variable)} instead."

diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
@@ -15,9 +15,9 @@ def import_keras():
 
     def import_from_keras():
         try:
-            import keras
+            import keras  # noqa
 
-            return (keras.utils.Sequence,), True
+            return (keras.utils.data_utils.Sequence,), True
         except ImportError:
             return tuple(), False
 
@@ -33,7 +33,10 @@ def import_from_tensforflow():
     ParentClassTensorflow, has_keras_tf = import_from_tensforflow()
     has_keras = has_keras_k or has_keras_tf
     if has_keras:
-        ParentClass = ParentClassKeras + ParentClassTensorflow
+        if has_keras_tf:
+            ParentClass = ParentClassTensorflow
+        else:
+            ParentClass = ParentClassKeras
     else:
         ParentClass = (object,)
     return ParentClass, has_keras