In [4]:
import random
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [2]:
X, y = load_iris(return_X_y=True)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [7]:
class OneSidedSelection:
    def __init__(self, k_neighbors=1):
        self.k_neighbors = k_neighbors

    def fit_resample(self, X, y):
        class_counts = Counter(y)
        minority_class = min(class_counts, key=class_counts.get)
        minority_class_size = class_counts[minority_class]

        X_resampled = []
        y_resampled = []

        for class_label, count in class_counts.items():
            X_class = X[y == class_label]

            if class_label != minority_class:
                knn = KNeighborsClassifier(n_neighbors=self.k_neighbors)
                knn.fit(X_class, np.full(X_class.shape[0], class_label))

                selected_samples = []
                for sample in X_class:
                    neighbors = knn.kneighbors([sample], return_distance=False).flatten()
                    if np.all(y[neighbors] == class_label):
                        selected_samples.append(sample)

                selected_samples = np.array(selected_samples)
                if len(selected_samples) > minority_class_size:
                    selected_samples = selected_samples[np.random.choice(len(selected_samples), 
                                                                         minority_class_size, 
                                                                         replace=False)]

                X_resampled.append(selected_samples)
                y_resampled.append(np.full(minority_class_size, class_label))
            else:
                X_resampled.append(X_class)
                y_resampled.append(np.full(X_class.shape[0], class_label))

        X_resampled = np.vstack(X_resampled)
        y_resampled = np.hstack(y_resampled)

        return X_resampled, y_resampled

In [9]:
oss = OneSidedSelection(k_neighbors=1)
X_train_resampled, y_train_resampled = oss.fit_resample(X_train, y_train)

In [10]:
print("y_train:", Counter(y_train))
print("y_train_resampled:", Counter(y_train_resampled))

y_train: Counter({2: 44, 0: 42, 1: 41})
y_train_resampled: Counter({2: 41, 0: 41, 1: 41})
