In [1]:
from scipy.io import loadmat
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

In [2]:
# Функция загрузки и подготовки данных
def load_and_prepare_data(image_path, mask_path, sample_size=None):
    image_data = loadmat(image_path)['image']
    mask_data = loadmat(mask_path)['img']

    if sample_size:
        indices = np.random.choice(image_data.shape[0] * image_data.shape[1], sample_size, replace=False)
        image_data_flat = image_data.reshape(-1, image_data.shape[2])[indices]
        mask_data_flat = mask_data.flatten()[indices]
    else:
        image_data_flat = image_data.reshape(-1, image_data.shape[2])
        mask_data_flat = mask_data.flatten()

    return image_data_flat, mask_data_flat

In [3]:
# Загрузка данных
sample_size = 100000  # Уменьши размер обучающей выборки
triple_coffee_data, triple_coffee_mask = load_and_prepare_data('./triple/triple_coffee.mat',
                                                               './triple/triple_coffee_mask.mat', sample_size)
double_coffee_data, double_coffee_mask = load_and_prepare_data('./double/double_coffee.mat',
                                                               './double/double_coffee_gt.mat', sample_size)

In [4]:
# Предварительная обработка данных
scaler = MinMaxScaler()
pca = PCA(n_components=200)

triple_coffee_data_scaled = scaler.fit_transform(triple_coffee_data)
triple_coffee_data_pca = pca.fit_transform(triple_coffee_data_scaled)

double_coffee_data_scaled = scaler.transform(double_coffee_data)
double_coffee_data_pca = pca.transform(double_coffee_data_scaled)

In [5]:
# Балансировка классов с использованием SMOTE
oversampler = SMOTE(random_state=42)
triple_coffee_data_res, triple_coffee_mask_res = oversampler.fit_resample(triple_coffee_data_pca, triple_coffee_mask)

In [6]:
# Разделим на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(double_coffee_data_pca, double_coffee_mask, test_size=0.2,
                                                    random_state=42)

In [7]:
# Обучение KNeighborsClassifier с заданными параметрами
best_knn = KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='ball_tree')
best_knn.fit(triple_coffee_data_res, triple_coffee_mask_res)

In [8]:
# Тестирование и оценка точности модели
y_pred_knn = best_knn.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f'Улучшенная точность KNN: {accuracy_knn}')

Улучшенная точность KNN: 0.7835
