Skip to content

Commit 4e885ae

Browse files
1oskutyanglemaitre
authored andcommitted
[MRG] NeighbourhoodCleaningRule bug fix #227 (#230)
* main fix: selecting neighbours instead of minority class misclassified samples * tests fix: 5-th sample additionally removed by new (fixed) rule * docstring fix: 2 more example samples removed by new (fixed) rule * fixed too long line * changelog * changelog: target name added
1 parent 079dc44 commit 4e885ae

File tree

3 files changed

+13
-11
lines changed

3 files changed

+13
-11
lines changed

doc/whats_new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Bug fixes
2424
- Fixed a bug in :class:`pipeline.Pipeline`, solve to embed `Pipeline` in other `Pipeline. By `Christos Aridas`_ .
2525
- Fixed a bug in :class:`pipeline.Pipeline`, solve the issue to put to sampler in the same `Pipeline`. By `Christos Aridas`_ .
2626
- Fixed a bug in :class:`under_sampling.CondensedNeareastNeigbour`, correction of the shape of `sel_x` when only one sample is selected. By `Aliaksei Halachkin`_.
27+
- Fixed a bug in :class:`under_sampling.NeighbourhoodCleaningRule`, selecting neighbours instead of minority class misclassified samples. By `Aleksandr Loskutov`_.
2728

2829
New features
2930
~~~~~~~~~~~~
@@ -117,3 +118,4 @@ New methods
117118
.. _Dayvid Oliveira: https://github.com/dvro
118119
.. _Francois Magimel: https://github.com/Linkid
119120
.. _Aliaksei Halachkin: https://github.com/honeyext
121+
.. _Aleksandr Loskutov: https://github.com/loskutyan

imblearn/under_sampling/neighbourhood_cleaning_rule.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class NeighbourhoodCleaningRule(BaseMulticlassSampler):
7777
>>> ncr = NeighbourhoodCleaningRule(random_state=42)
7878
>>> X_res, y_res = ncr.fit_sample(X, y)
7979
>>> print('Resampled dataset shape {}'.format(Counter(y_res)))
80-
Resampled dataset shape Counter({1: 891, 0: 100})
80+
Resampled dataset shape Counter({1: 889, 0: 100})
8181
8282
References
8383
----------
@@ -199,8 +199,8 @@ def _sample(self, X, y):
199199
# If the minority class remove the majority samples
200200
if key == self.min_c_:
201201
# Get the index to exclude
202-
idx_to_exclude += nnhood_idx[np.nonzero(nnhood_label[
203-
np.flatnonzero(nnhood_bool)])].tolist()
202+
idx_to_exclude += nnhood_idx[np.nonzero(np.logical_not(
203+
nnhood_label[np.flatnonzero(nnhood_bool)]))].tolist()
204204
else:
205205
# Get the index to exclude
206206
idx_to_exclude += idx_sub_sample[np.nonzero(

imblearn/under_sampling/tests/test_neighbourhood_cleaning_rule.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ def test_ncr_fit_sample():
8282
X_resampled, y_resampled = ncr.fit_sample(X, Y)
8383

8484
X_gt = np.array([[-1.20809175, -1.49917302], [-0.60497017, -0.66630228],
85-
[-0.91735824, 0.93110278], [-0.20413357, 0.64628718],
85+
[-0.91735824, 0.93110278],
8686
[0.35967591, 2.61186964], [-1.55581933, 1.09609604],
8787
[1.55157493, -1.6981518]])
88-
y_gt = np.array([0, 0, 1, 1, 2, 1, 2])
88+
y_gt = np.array([0, 0, 1, 2, 1, 2])
8989
assert_array_equal(X_resampled, X_gt)
9090
assert_array_equal(y_resampled, y_gt)
9191

@@ -98,11 +98,11 @@ def test_ncr_fit_sample_with_indices():
9898
X_resampled, y_resampled, idx_under = ncr.fit_sample(X, Y)
9999

100100
X_gt = np.array([[-1.20809175, -1.49917302], [-0.60497017, -0.66630228],
101-
[-0.91735824, 0.93110278], [-0.20413357, 0.64628718],
101+
[-0.91735824, 0.93110278],
102102
[0.35967591, 2.61186964], [-1.55581933, 1.09609604],
103103
[1.55157493, -1.6981518]])
104-
y_gt = np.array([0, 0, 1, 1, 2, 1, 2])
105-
idx_gt = np.array([10, 11, 3, 5, 7, 13, 14])
104+
y_gt = np.array([0, 0, 1, 2, 1, 2])
105+
idx_gt = np.array([10, 11, 3, 7, 13, 14])
106106
assert_array_equal(X_resampled, X_gt)
107107
assert_array_equal(y_resampled, y_gt)
108108
assert_array_equal(idx_under, idx_gt)
@@ -139,11 +139,11 @@ def test_ncr_fit_sample_nn_obj():
139139
X_resampled, y_resampled, idx_under = ncr.fit_sample(X, Y)
140140

141141
X_gt = np.array([[-1.20809175, -1.49917302], [-0.60497017, -0.66630228],
142-
[-0.91735824, 0.93110278], [-0.20413357, 0.64628718],
142+
[-0.91735824, 0.93110278],
143143
[0.35967591, 2.61186964], [-1.55581933, 1.09609604],
144144
[1.55157493, -1.6981518]])
145-
y_gt = np.array([0, 0, 1, 1, 2, 1, 2])
146-
idx_gt = np.array([10, 11, 3, 5, 7, 13, 14])
145+
y_gt = np.array([0, 0, 1, 2, 1, 2])
146+
idx_gt = np.array([10, 11, 3, 7, 13, 14])
147147
assert_array_equal(X_resampled, X_gt)
148148
assert_array_equal(y_resampled, y_gt)
149149
assert_array_equal(idx_under, idx_gt)

0 commit comments

Comments
 (0)