Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Bug fixes

- Fixed a bug in :class:`under_sampling.NearMiss` which was not picking the right samples during under sampling for the method 3. By `Guillaume Lemaitre`_.
- Fixed a bug in :class:`ensemble.EasyEnsemble`, correction of the `random_state` generation. By `Guillaume Lemaitre`_ and `Christos Aridas`_.
- Fixed a bug in :class:`under_sampling.AllKNN`, add stopping criteria to avoid that the minority class become a majority class or that a class disappear. By `Guillaume Lemaitre`_.

New features
~~~~~~~~~~~~
Expand Down
38 changes: 35 additions & 3 deletions imblearn/under_sampling/edited_nearest_neighbours.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,10 +537,42 @@ def _sample(self, X, y):
# updating ENN size_ngh
self.enn_.size_ngh = curr_size_ngh
if self.return_indices:
X_, y_, idx_ = self.enn_.fit_sample(X_, y_)
idx_under = idx_under[idx_]
X_enn, y_enn, idx_enn = self.enn_.fit_sample(X_, y_)
else:
X_, y_ = self.enn_.fit_sample(X_, y_)
X_enn, y_enn = self.enn_.fit_sample(X_, y_)

# Check the stopping criterion
# 1. If the number of samples in the other class become inferior to
# the number of samples in the majority class
# 2. If one of the class is disappearing
# Case 1
stats_enn = Counter(y_enn)
self.logger.debug('Current ENN stats: %s', stats_enn)
# Get the number of samples in the non-minority classes
count_non_min = np.array([val for val, key
in zip(stats_enn.itervalues(),
stats_enn.iterkeys())
if key != self.min_c_])
self.logger.debug('Number of samples in the non-majority'
' classes: %s', count_non_min)
# Check the minority stop to be the minority
b_min_bec_maj = np.any(count_non_min < self.stats_c_[self.min_c_])

# Case 2
b_remove_maj_class = (len(stats_enn) < len(self.stats_c_))

if b_min_bec_maj or b_remove_maj_class:
# Log the variables to explain the stop of the algorithm
self.logger.debug('AllKNN minority become majority: %s',
b_min_bec_maj)
self.logger.debug('AllKNN remove one class: %s',
b_remove_maj_class)
break

# Update the data for the next iteration
X_, y_, = X_enn, y_enn
if self.return_indices:
idx_under = idx_under[idx_enn]

self.logger.info('Under-sampling performed: %s', Counter(y_))

Expand Down
6 changes: 3 additions & 3 deletions imblearn/under_sampling/tests/test_allknn.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,6 @@ def test_multiclass_fit_sample():

# Check the size of y
count_y_res = Counter(y_resampled)
assert_equal(count_y_res[0], 341)
assert_equal(count_y_res[1], 2485)
assert_equal(count_y_res[2], 212)
assert_equal(count_y_res[0], 400)
assert_equal(count_y_res[1], 3600)
assert_equal(count_y_res[2], 1000)