Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions imblearn/over_sampling/_random_over_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,9 @@ def _fit_resample(self, X, y):
safe_indexing(y, sample_indices), sample_indices)
return (safe_indexing(X, sample_indices),
safe_indexing(y, sample_indices))

def _more_tags(self):
# TODO: remove the str tag once the following PR is merged:
# https://github.com/scikit-learn/scikit-learn/pull/14043
return {'X_types': ['2darray', 'str', 'string'],
'sample_indices': True}
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,6 @@ def _fit_resample(self, X, y):
y_resampled = np.hstack(y_resampled)

return X_resampled, np.array(y_resampled, dtype=y.dtype)

def _more_tags(self):
return {'sample_indices': False}
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,6 @@ def _fit_resample(self, X, y):
return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
idx_under)
return safe_indexing(X, idx_under), safe_indexing(y, idx_under)

def _more_tags(self):
return {'sample_indices': True}
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ def _fit_resample(self, X, y):
idx_under)
return safe_indexing(X, idx_under), safe_indexing(y, idx_under)

def _more_tags(self):
return {'sample_indices': True}


@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
Expand Down Expand Up @@ -377,6 +380,9 @@ def _fit_resample(self, X, y):
return X_resampled, y_resampled, self.sample_indices_
return X_resampled, y_resampled

def _more_tags(self):
return {'sample_indices': True}


@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
Expand Down Expand Up @@ -564,3 +570,6 @@ def _fit_resample(self, X, y):
if self.return_indices:
return X_resampled, y_resampled, self.sample_indices_
return X_resampled, y_resampled

def _more_tags(self):
return {'sample_indices': True}
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,6 @@ def _fit_resample(self, X, y):
return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
idx_under)
return safe_indexing(X, idx_under), safe_indexing(y, idx_under)

def _more_tags(self):
return {'sample_indices': True}
3 changes: 3 additions & 0 deletions imblearn/under_sampling/_prototype_selection/_nearmiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,6 @@ def _fit_resample(self, X, y):
return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
idx_under)
return safe_indexing(X, idx_under), safe_indexing(y, idx_under)

def _more_tags(self):
return {'sample_indices': True}
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,6 @@ def _fit_resample(self, X, y):
self.sample_indices_)
return (safe_indexing(X, self.sample_indices_),
safe_indexing(y, self.sample_indices_))

def _more_tags(self):
return {'sample_indices': True}
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,6 @@ def _fit_resample(self, X, y):
if self.return_indices:
return (X_cleaned, y_cleaned, self.sample_indices_)
return X_cleaned, y_cleaned

def _more_tags(self):
return {'sample_indices': True}
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,9 @@ def _fit_resample(self, X, y):
return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
idx_under)
return safe_indexing(X, idx_under), safe_indexing(y, idx_under)

def _more_tags(self):
# TODO: remove the str tag once the following PR is merged:
# https://github.com/scikit-learn/scikit-learn/pull/14043
return {'X_types': ['2darray', 'str', 'string'],
'sample_indices': True}
3 changes: 3 additions & 0 deletions imblearn/under_sampling/_prototype_selection/_tomek_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,6 @@ def _fit_resample(self, X, y):
self.sample_indices_)
return (safe_indexing(X, self.sample_indices_),
safe_indexing(y, self.sample_indices_))

def _more_tags(self):
return {'sample_indices': True}
37 changes: 3 additions & 34 deletions imblearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,40 +34,12 @@
from imblearn.under_sampling import NearMiss, ClusterCentroids

DONT_SUPPORT_RATIO = ['SVMSMOTE', 'BorderlineSMOTE']
SUPPORT_STRING = ['RandomUnderSampler', 'RandomOverSampler']
HAVE_SAMPLE_INDICES = [
'RandomOverSampler', 'RandomUnderSampler', 'InstanceHardnessThreshold',
'NearMiss', 'TomekLinks', 'EditedNearestNeighbours',
'RepeatedEditedNearestNeighbours', 'AllKNN', 'OneSidedSelection',
'CondensedNearestNeighbour', 'NeighbourhoodCleaningRule']
# FIXME: remove in 0.6
DONT_HAVE_RANDOM_STATE = ('NearMiss', 'EditedNearestNeighbours',
'RepeatedEditedNearestNeighbours', 'AllKNN',
'NeighbourhoodCleaningRule', 'TomekLinks')


def monkey_patch_check_dtype_object(name, estimator_orig):
# check that estimators treat dtype object as numeric if possible
rng = np.random.RandomState(0)
X = rng.rand(40, 10).astype(object)
y = np.array([0] * 10 + [1] * 30, dtype=np.int)
estimator = clone(estimator_orig)
estimator.fit(X, y)

try:
estimator.fit(X, y.astype(object))
except Exception as e:
if "Unknown label type" not in str(e):
raise

if name not in SUPPORT_STRING:
X[0, 0] = {'foo': 'bar'}
msg = "argument must be a string or a number"
assert_raises_regex(TypeError, msg, estimator.fit, X, y)
else:
estimator.fit(X, y)


def _yield_sampler_checks(name, Estimator):
yield check_target_type
yield check_samplers_one_label
Expand Down Expand Up @@ -106,10 +78,6 @@ def check_estimator(Estimator, run_sampler_tests=True):
Will run or not the samplers tests.
"""
name = Estimator.__name__
# monkey patch check_dtype_object for the sampler allowing strings
import sklearn.utils.estimator_checks
sklearn.utils.estimator_checks.check_dtype_object = \
monkey_patch_check_dtype_object
# scikit-learn common tests
sklearn_check_estimator(Estimator)
check_parameters_default_constructible(name, Estimator)
Expand Down Expand Up @@ -369,7 +337,8 @@ def check_samplers_sample_indices(name, Sampler):
weights=[0.2, 0.3, 0.5], random_state=0)
sampler = Sampler()
sampler.fit_resample(X, y)
if name in HAVE_SAMPLE_INDICES:
assert hasattr(sampler, 'sample_indices_')
sample_indices = sampler._get_tags().get('sample_indices', None)
if sample_indices:
assert hasattr(sampler, 'sample_indices_') is sample_indices
else:
assert not hasattr(sampler, 'sample_indices_')