In [1]:
import numpy as np
import pandas as pd
from imblearn.combine import SPIDER

In [2]:
X = np.array([
    [ 2.72, 2.97],
    [ 3.06, 4.29],
    [ 3.34, 1.67],
    [ 4.00, 5.77],
    [ 4.48, 0.39],
    [ 5.00, 1.45],  # noisy minority -- amplify 3
    [ 5.64, 3.89],
    [ 6.14, 2.77],
    [ 6.78, 3.81],
    [ 7.20, 2.93],
    [ 7.92, 1.35],
    [ 9.02, 3.51],  # noisy minority -- amplify 1
    [10.10, 4.29],  # noisy majority -- remove / relabel
    [10.58, 2.71],
    [12.40, 3.03],
    [12.84, 1.33],
    [13.56, 4.23],  # noisy majority -- remove / relabel
    [13.68, 2.27],  # noisy majority -- remove / relabel
    [15.10, 4.25],  # noisy minority -- amplify 0 (no safe majority in neighborhood)
    [15.88, 1.15],  # noisy majority -- remove
])
            # 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19
y = np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0])

In [3]:
df = pd.DataFrame(np.hstack([X, y[:, np.newaxis]]), columns=['x', 'y', 'class'])
df['class'] = df['class'].astype(int)

X = df.drop(columns=['class'])
y = df['class']

df

Unnamed: 0,x,y,class
0,2.72,2.97,0
1,3.06,4.29,0
2,3.34,1.67,0
3,4.0,5.77,0
4,4.48,0.39,0
5,5.0,1.45,1
6,5.64,3.89,0
7,6.14,2.77,0
8,6.78,3.81,0
9,7.2,2.93,0


In [14]:
sort_idxs = np.argsort(X_r[:, 0], axis=0)
X_r[sort_idxs]

array([[ 2.72,  2.97],
       [ 3.06,  4.29],
       [ 3.34,  1.67],
       [ 4.  ,  5.77],
       [ 4.48,  0.39],
       [ 5.  ,  1.45],
       [ 5.  ,  1.45],
       [ 5.  ,  1.45],
       [ 5.  ,  1.45],
       [ 5.64,  3.89],
       [ 6.14,  2.77],
       [ 6.78,  3.81],
       [ 7.2 ,  2.93],
       [ 7.92,  1.35],
       [ 9.02,  3.51],
       [ 9.02,  3.51],
       [10.1 ,  4.29],
       [10.58,  2.71],
       [12.4 ,  3.03],
       [12.84,  1.33],
       [13.56,  4.23],
       [13.68,  2.27],
       [15.1 ,  4.25]])

## Weak

In [4]:
weak = SPIDER(kind='weak')

X_w, y_w = weak.fit_resample(X, y)

df_w = pd.DataFrame({'x': X_w[:, 0], 'y': X_w[:, 1], 'class': y_w})
df_w.sort_values('x')

Unnamed: 0,x,y,class
4,2.72,2.97,0
5,3.06,4.29,0
6,3.34,1.67,0
7,4.0,5.77,0
8,4.48,0.39,0
0,5.0,1.45,1
9,5.0,1.45,1
2,5.0,1.45,1
1,5.0,1.45,1
10,5.64,3.89,0


## Relabel

In [5]:
relabel = SPIDER(kind='relabel')

X_r, y_r = relabel.fit_resample(X, y)

df_r = pd.DataFrame({'x': X_r[:, 0], 'y': X_r[:, 1], 'class': y_r})
df_r.sort_values('x')

Unnamed: 0,x,y,class
4,2.72,2.97,0
5,3.06,4.29,0
6,3.34,1.67,0
7,4.0,5.77,0
8,4.48,0.39,0
0,5.0,1.45,1
1,5.0,1.45,1
2,5.0,1.45,1
9,5.0,1.45,1
10,5.64,3.89,0
