In [11]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import pyswarms as ps

In [12]:
X_train = np.load('X_train.npy')
X_test  = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test  = np.load('y_test.npy')

# Алгоритм роя частиц

## KNN

In [13]:
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

In [14]:
def objective_knn(x):
    n_particles = x.shape[0]
    scores = np.zeros(n_particles)
    for i in range(n_particles):
        n_neighbors = int(np.round(x[i, 0]))
        n_neighbors = max(1, n_neighbors)
        p = int(np.round(x[i, 1]))
        p = 1 if p < 1 else 2 if p > 2 else p

        model = KNeighborsClassifier(n_neighbors=n_neighbors, p=p, n_jobs=-1)
        score = cross_val_score(
            model,
            X_train,
            y_train,
            cv=cv,
            scoring='f1_macro',
            n_jobs=-1
        ).mean()
        scores[i] = -score  
    return scores

In [15]:
# Параметры PSO
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
# Границы: n_neighbors [1,50], p [1,10]
bounds_knn = (np.array([1, 1]), np.array([50, 10]))

In [16]:
optimizer_knn = ps.single.GlobalBestPSO(
    n_particles=20,
    dimensions=2,
    options=options,
    bounds=bounds_knn
)

In [17]:
best_cost_knn, best_pos_knn = optimizer_knn.optimize(objective_knn, iters=1)

2025-04-11 20:07:57,345 - pyswarms.single.global_best - INFO - Optimize for 1 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best: 100%|██████████|1/1, best_cost=-0.994
2025-04-11 20:15:23,508 - pyswarms.single.global_best - INFO - Optimization finished | best cost: -0.9942574611712836, best pos: [1.40784775 3.17412922]


In [18]:
best_n_neighbors = int(np.round(best_pos_knn[0]))
best_p = int(np.round(best_pos_knn[1]))
print("Best KNN params: n_neighbors=", best_n_neighbors, ", p=", best_p)
print("Best KNN f1_macro=", -best_cost_knn)

Best KNN params: n_neighbors= 1 , p= 3
Best KNN f1_macro= 0.9942574611712836


### Лучшие значения

Best KNN params: n_neighbors= 1 , p= 3

Best KNN f1_macro= 0.9942574611712836


## SVC (отброшен ввиду долгих вычислений)

In [23]:
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

In [24]:
def objective_svc(x):
    n_particles = x.shape[0]
    scores = np.zeros(n_particles)
    for i in range(n_particles):
        C = 10 ** x[i, 0]
        gamma = 10 ** x[i, 1]

        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('svc', SVC(C=C, gamma=gamma, kernel='rbf', random_state=42))
        ])
        score = cross_val_score(
            pipeline,
            X_train,
            y_train,
            cv=cv,
            scoring='f1_macro',
            n_jobs=-1
        ).mean()
        scores[i] = -score
    return scores

In [25]:
# Границы: log10(C) [-3,3], log10(gamma) [-5,2]
bounds_svc = (np.array([-3, -5]), np.array([3, 2]))

optimizer_svc = ps.single.GlobalBestPSO(
    n_particles=20,
    dimensions=2,
    options=options,
    bounds=bounds_svc
)

In [None]:
best_cost_svc, best_pos_svc = optimizer_svc.optimize(objective_svc, iters=1)

In [None]:
best_C = 10 ** best_pos_svc[0]
best_gamma = 10 ** best_pos_svc[1]
print("Best SVC params: C=", best_C, ", gamma=", best_gamma)
print("Best SVC f1_macro=", -best_cost_svc)

## DCT

In [None]:
from sklearn.tree import DecisionTreeClassifier as DTC

In [None]:
from pyswarms.single import GlobalBestPSO

# Определяем пространство поиска и границы
lower_bounds = [0, 0, 0.5, 1.5, 0.5, 0]
upper_bounds = [2.999, 1.999, 15.499, 20.499, 20.499, 2.999]
bounds = (np.array(lower_bounds), np.array(upper_bounds))

def decode_pso_position(position):
    """Преобразуем непрерывные значения частицы в параметры дерева"""
    criterion_idx = int(round(position[0]))
    criterion_idx = max(0, min(criterion_idx, 2))
    
    splitter_idx = int(round(position[1]))
    splitter_idx = max(0, min(splitter_idx, 1))
    
    max_depth = int(round(position[2]))
    max_depth = max(1, min(max_depth, 15))
    
    min_samples_split = int(round(position[3]))
    min_samples_split = max(2, min(min_samples_split, 20))
    
    min_samples_leaf = int(round(position[4]))
    min_samples_leaf = max(1, min(min_samples_leaf, 20))
    
    max_features_idx = int(round(position[5]))
    max_features_idx = max(0, min(max_features_idx, 2))

    return {
        'criterion': ["gini", "entropy", "log_loss"][criterion_idx],
        'splitter': ["best", "random"][splitter_idx],
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_features': ["sqrt", "log2", None][max_features_idx],
        'random_state': 42
    }

def objective_function(particles):
    """Функция оценки качества частиц"""
    scores = []
    for particle in particles:
        try:
            model = DTC(**decode_pso_position(particle))
            score = cross_val_score(model, X_train, y_train, 
                                 cv=5, scoring="f1_macro").mean()
            scores.append(-score)  # Инвертируем для минимизации
        except:
            scores.append(1)  # При ошибке назначаем плохой результат
    return np.array(scores)

# Настройка и запуск PSO
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
optimizer = GlobalBestPSO(n_particles=20, 
                        dimensions=6, 
                        options=options, 
                        bounds=bounds)

best_cost, best_pos = optimizer.optimize(objective_function, iters=30)

# Получаем и выводим результаты
best_params = decode_pso_position(best_pos)
print(f"Лучшая точность: {-best_cost:.4f}")  # Инвертируем обратно
print("Лучшие гиперпараметры:", best_params)

2025-04-14 19:42:59,523 - pyswarms.single.global_best - INFO - Optimize for 30 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best: 100%|██████████|30/30, best_cost=-0.981
2025-04-14 20:05:41,618 - pyswarms.single.global_best - INFO - Optimization finished | best cost: -0.9811641417873908, best pos: [ 2.43433038  0.39659199 14.6325186  13.29556746  8.98261408  2.15295213]


Лучшая точность: 0.9812
Лучшие гиперпараметры: {'criterion': 'log_loss', 'splitter': 'best', 'max_depth': 15, 'min_samples_split': 13, 'min_samples_leaf': 9, 'max_features': None, 'random_state': 42}


### Лучшие значения
Лучшая точность: 0.9812

Лучшие гиперпараметры: 
{'criterion': 'log_loss', 'splitter': 'best', 'max_depth': 15, 'min_samples_split': 13, 'min_samples_leaf': 9, 'max_features': None, 'random_state': 42}
