# Resampling

In [None]:
import ipywidgets as widgets
from imblearn import combine, over_sampling, under_sampling
from IPython.display import display
from sklearn.datasets import make_classification
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

from imbalanced.util.plot import Plot

__RANDOM_STATE = 325235

plot = Plot()

## Data set

In [None]:
data_random_state = widgets.IntSlider()
data_random_state.min = 1
data_random_state.max = 1000
data_random_state.value = 530
display(data_random_state)

In [None]:
x, y = make_classification(
    n_samples=300,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_clusters_per_class=2,
    class_sep=0.8,
    weights=[0.9],
    flip_y=0,
    random_state=data_random_state.value,
)

plot.features_distribution(x, y)

## Evaluation

In [None]:
x_train, x_test, y_train, y_test = train_test_split(
    x,
    y,
    test_size=0.2,
    random_state=__RANDOM_STATE
)

svc = SVC(C=1.0, random_state=__RANDOM_STATE)
f1_scorer = make_scorer(f1_score, average='macro')

## Imblanaced

In [None]:
svc.fit(x_train, y_train)
plot.decision_boundary(x, y, svc)

f1_imbalanced = f1_scorer(svc, x_test, y_test)
print(f'F1 score: {f1_imbalanced}')

## Random Over-Sampling

In [None]:
resampler = over_sampling.RandomOverSampler(random_state=__RANDOM_STATE)
x_resampled, y_resampled = resampler.fit_resample(x_train, y_train)

svc.fit(x_resampled, y_resampled)
plot.decision_boundary(x, y, svc)

f1_imbalanced = f1_scorer(svc, x_test, y_test)
print(f'F1 score: {f1_imbalanced}')

## Smote
[docs](https://imbalanced-learn.readthedocs.io/en/stable/generated/imblearn.over_sampling.SMOTE.html)

In [None]:
smote = over_sampling.SMOTE(random_state=__RANDOM_STATE)
x_resampled, y_resampled = smote.fit_resample(x_train, y_train)

svc.fit(x_resampled, y_resampled)
plot.decision_boundary(x, y, svc)

f1_imbalanced = f1_scorer(svc, x_test, y_test)
print(f'F1 score: {f1_imbalanced}')

## Tomek-Links
[docs](https://imbalanced-learn.readthedocs.io/en/stable/generated/imblearn.under_sampling.TomekLinks.html)

In [None]:
tlinks = under_sampling.TomekLinks()
x_resampled, y_resampled = tlinks.fit_resample(x_resampled, y_resampled)

svc.fit(x_resampled, y_resampled)
plot.decision_boundary(x, y, svc)

f1_imbalanced = f1_scorer(svc, x_test, y_test)
print(f'F1 score: {f1_imbalanced}')

## Pipeline

[docs](https://imbalanced-learn.readthedocs.io/en/stable/generated/imblearn.pipeline.Pipeline.html)

In [None]:
from imblearn.pipeline import Pipeline


pipeline = Pipeline(
    steps=[
        ('smote', over_sampling.SMOTE(random_state=__RANDOM_STATE)),
        ('tlinks', under_sampling.TomekLinks()),
        ('svc', SVC(C=1.0, random_state=__RANDOM_STATE))
    ]
)

pipeline.fit(x_train, y_train)
plot.decision_boundary(x, y, pipeline)

f1_pipeline = f1_scorer(pipeline, x_test, y_test)
print(f'F1 score: {f1_pipeline}')