From 5f7ea42a70610fdb2503ff053b632168d78ab09d Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Sun, 31 Jul 2016 20:05:23 +0200 Subject: [PATCH 1/2] PEP8 and doc for make_imbalance --- doc/api.rst | 18 ++++++++++++++++++ imblearn/datasets/imbalance.py | 20 ++++++++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index bd41a6d4c..daf1919ce 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -118,3 +118,21 @@ Functions pipeline.make_pipeline + +.. _datasets_ref: + +Datasets +======== + +.. automodule:: imblearn.datasets + :no-members: + :no-inherited-members: + +.. currentmodule:: imblearn + +Functions +--------- +.. autosummary:: + :toctree: generated/ + + datasets.make_imbalance diff --git a/imblearn/datasets/imbalance.py b/imblearn/datasets/imbalance.py index dcae11c94..9e5c34fa5 100644 --- a/imblearn/datasets/imbalance.py +++ b/imblearn/datasets/imbalance.py @@ -7,6 +7,7 @@ from sklearn.utils import check_X_y from sklearn.utils import check_random_state + def make_imbalance(X, y, ratio, min_c_=None, random_state=None): """Turns a dataset into an imbalanced dataset at specific ratio. A simple toy dataset to visualize clustering and classification @@ -20,10 +21,10 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None): y : ndarray, shape (n_samples, ) Corresponding label for each sample in X. - ratio : float, - The desired ratio given by the number of samples in - the minority class over the the number of samples in - the majority class. + ratio : float, + The desired ratio given by the number of samples in + the minority class over the the number of samples in + the majority class. Thus the ratio should be in the interval [0., 1.] min_c_ : str or int, optional (default=None) The identifier of the class to be the minority class. @@ -42,6 +43,7 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None): y_resampled : ndarray, shape (n_samples_new) The corresponding label of `X_resampled` + """ if ratio <= 0.0 or ratio >= 1.0: raise ValueError('ratio value must be such that 0.0 < ratio < 1.0') @@ -52,12 +54,16 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None): stats_c_ = Counter(y) + self.logger.info('The original target distribution in the dataset is: %s', + stats_c_) + if min_c_ is None: min_c_ = min(stats_c_, key=stats_c_.get) n_min_samples = int(np.count_nonzero(y != min_c_) * ratio) if n_min_samples > stats_c_[min_c_]: - raise ValueError('Current imbalance ratio of data is lower than desired ratio!') + raise ValueError('Current imbalance ratio of data is lower than' + ' desired ratio!') if n_min_samples == 0: raise ValueError('Not enough samples for desired ratio!') @@ -68,7 +74,9 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None): idx_min = random_state.choice(idx_min, size=n_min_samples, replace=False) idx = np.concatenate((idx_min, idx_maj), axis=0) - X_resampled, y_resampled = X[idx,:], y[idx] + X_resampled, y_resampled = X[idx, :], y[idx] + + self.logger.info('Make the dataset imbalanced: %s', Counter(y_resampled)) return X_resampled, y_resampled From cd8c1be25b4fb86261589428e4f8d91c45368fc3 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Sun, 31 Jul 2016 20:09:18 +0200 Subject: [PATCH 2/2] Add logger for the module --- imblearn/datasets/imbalance.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/imblearn/datasets/imbalance.py b/imblearn/datasets/imbalance.py index 9e5c34fa5..3ea89a892 100644 --- a/imblearn/datasets/imbalance.py +++ b/imblearn/datasets/imbalance.py @@ -1,5 +1,7 @@ """Transform a dataset into an imbalanced dataset.""" +import logging + import numpy as np from collections import Counter @@ -7,6 +9,8 @@ from sklearn.utils import check_X_y from sklearn.utils import check_random_state +LOGGER = logging.getLogger(__name__) + def make_imbalance(X, y, ratio, min_c_=None, random_state=None): """Turns a dataset into an imbalanced dataset at specific ratio. @@ -54,8 +58,8 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None): stats_c_ = Counter(y) - self.logger.info('The original target distribution in the dataset is: %s', - stats_c_) + LOGGER.info('The original target distribution in the dataset is: %s', + stats_c_) if min_c_ is None: min_c_ = min(stats_c_, key=stats_c_.get) @@ -76,7 +80,7 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None): X_resampled, y_resampled = X[idx, :], y[idx] - self.logger.info('Make the dataset imbalanced: %s', Counter(y_resampled)) + LOGGER.info('Make the dataset imbalanced: %s', Counter(y_resampled)) return X_resampled, y_resampled