In [2]:
import numpy as np
import pandas as pd

from imblearn.datasets import make_imbalance
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, classification_report

from sklearn.datasets import load_wine

In [10]:
wine = load_wine()

data = pd.DataFrame(wine.data, columns=wine.feature_names)
data['target'] = wine.target

X = data.drop('target', axis=1)
y = data['target']

In [11]:
data.target.value_counts(True)

1    0.398876
0    0.331461
2    0.269663
Name: target, dtype: float64

In [12]:
data.target.value_counts().sum()

178

In [14]:
imbalance_ratios = {0:0.6, 1:0.3, 2:0.1}
X, y = make_imbalance(X, y, sampling_strategy=imbalance_ratios)

TypeError: expected a sequence of integers or a single integer, got '0.6'

In [9]:
y.value_counts().sum()

114

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [17]:
pipeline = Pipeline([
    ('smote', SMOTE(random_state=42)),
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression())
])

In [23]:
scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='f1_weighted')
print(f'cross_val f1 score (weighted): {np.mean(scores)}')

cross_val f1 score (weighted): 0.9562527233115468


In [25]:
pipeline.fit(X_train, y_train)

In [26]:
y_preds = pipeline.predict(X_test)

In [29]:
test_f1 = f1_score(y_test, y_preds, average='weighted')
print(f'Test F1 score: {test_f1}')

Test F1 score: 0.95641277105808


In [31]:
print(classification_report(y_test, y_preds))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.96        23
   macro avg       0.97      0.97      0.97        23
weighted avg       0.96      0.96      0.96        23

