In [30]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score

import pygad

In [31]:
loaded_data = np.load('preprocessed_data.npz')
X_train = loaded_data['X_train']
X_test = loaded_data['X_test']
y_train = loaded_data['y_train']
y_test = loaded_data['y_test']

# Классический генетический алгоритм

Оптимизация гипер параметров с помощью PyGad

### KNN

In [32]:
from sklearn.neighbors import KNeighborsClassifier

In [33]:
def fitness_func(ga_instance, solution, solution_idx):
    model = KNeighborsClassifier(
        n_neighbors=int(solution[0]),
        p=int(solution[1]),
        n_jobs=-1
    )

    score = cross_val_score(
        model,
        X_train, 
        y_train, 
        cv=3,
        scoring='f1_macro'
    ).mean()

    return score

In [34]:
gene_space = [
    {'low': 1, 'high': 100},  # n_neighbours
    {'low': 1, 'high': 10}     # p
]

In [35]:
ga_instance = pygad.GA(
    num_generations=20,
    sol_per_pop=10,
    num_parents_mating=5,
    num_genes=2,
    fitness_func=fitness_func,
    gene_space=gene_space,
    parent_selection_type="rank",
    keep_parents=2,
    mutation_type="random",
    mutation_percent_genes=50
)

In [None]:
ga_instance.run()

solution, solution_fitness, solution_idx = ga_instance.best_solution()
print(f"Лучшие гиперпараметры: n_estimators={int(solution[0])}, max_depth={int(solution[1])}")
print(f"Точность: {solution_fitness}")