Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions imblearn/under_sampling/one_sided_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class OneSidedSelection(BaseBinarySampler):
>>> oss = OneSidedSelection(random_state=42)
>>> X_res, y_res = oss.fit_sample(X, y)
>>> print('Resampled dataset shape {}'.format(Counter(y_res)))
Resampled dataset shape Counter({1: 496, 0: 100})
Resampled dataset shape Counter({1: 495, 0: 100})

References
----------
Expand Down Expand Up @@ -192,21 +192,23 @@ def _sample(self, X, y):

# Randomly get one sample from the majority class
# Generate the index to select
idx_maj_sample = random_state.randint(
low=0, high=self.stats_c_[key], size=self.n_seeds_S)
maj_sample = X[y == key][idx_maj_sample]
idx_maj = np.flatnonzero(y == key)
idx_maj_sample = idx_maj[
random_state.randint(
low=0,
high=self.stats_c_[key],
size=self.n_seeds_S)]
maj_sample = X[idx_maj_sample]

# Create the set C
C_x = np.append(X_min, maj_sample, axis=0)
C_y = np.append(y_min, [key] * self.n_seeds_S)

# Create the set S
S_x = X[y == key]
S_y = y[y == key]

# Remove the seed from S since that it will be added anyway
S_x = np.delete(S_x, idx_maj_sample, axis=0)
S_y = np.delete(S_y, idx_maj_sample, axis=0)
# Create the set S with removing the seed from S
# since that it will be added anyway
idx_maj_extracted = np.delete(idx_maj, idx_maj_sample, axis=0)
S_x = X[idx_maj_extracted]
S_y = y[idx_maj_extracted]

# Fit C into the knn
self.estimator_.fit(C_x, C_y)
Expand All @@ -222,8 +224,7 @@ def _sample(self, X, y):
# We concatenate the misclassified samples with the seed and the
# minority samples
if self.return_indices:
idx_tmp = np.flatnonzero(
y == key)[np.flatnonzero(pred_S_y != S_y)]
idx_tmp = idx_maj_extracted[np.flatnonzero(pred_S_y != S_y)]
idx_under = np.concatenate(
(idx_under, idx_maj_sample, idx_tmp), axis=0)

Expand Down
2 changes: 1 addition & 1 deletion imblearn/under_sampling/tests/test_one_sided_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_oss_fit_sample_with_indices():
[-0.43877303, 1.07366684], [-0.85795321, 0.82980738],
[-0.30126957, -0.66268378], [0.20246714, -0.34727125]])
y_gt = np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
idx_gt = np.array([0, 3, 9, 12, 13, 14, 1, 2, 5, 6, 7, 10])
idx_gt = np.array([0, 3, 9, 12, 13, 14, 1, 2, 5, 6, 8, 11])
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
assert_array_equal(idx_under, idx_gt)
Expand Down