In [36]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score

import pygad

In [37]:
X_train = np.load('X_train.npy')
X_test  = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test  = np.load('y_test.npy')

# Классический генетический алгоритм

Оптимизация гипер параметров с помощью PyGad

### KNN

In [38]:
from sklearn.neighbors import KNeighborsClassifier

In [39]:
from tqdm import tqdm

In [40]:
num_generations = 20
sol_per_pop     = 10

In [41]:
total_evals = num_generations * sol_per_pop
pbar = tqdm(total=total_evals, desc="GA evaluations")

GA evaluations:   0%|          | 1/200 [00:11<36:29, 11.00s/it]


In [42]:
def fitness_func(ga_instance, solution, solution_idx):
    pbar.update(1)
    
    model = KNeighborsClassifier(
        n_neighbors=int(solution[0]),
        p=int(solution[1]),
        n_jobs=-1
    )

    score = cross_val_score(
        model,
        X_train, 
        y_train, 
        cv=3,
        scoring='f1_macro',
        n_jobs=-1
    ).mean()

    return score

In [43]:
gene_space = [
    {'low': 1, 'high': 100},  # n_neighbours
    {'low': 1, 'high': 10}     # p
]

In [44]:
ga_instance = pygad.GA(
    num_generations=num_generations,
    sol_per_pop=sol_per_pop,
    num_parents_mating=5,
    num_genes=2,
    fitness_func=fitness_func,
    gene_space=gene_space,
    parent_selection_type="rank",
    keep_parents=2,
    mutation_type="random",
    mutation_percent_genes=50,
    random_seed=42
)

In [45]:
ga_instance.run()
# pbar.close()
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print(f"Лучшие гиперпараметры: n_neighbors={int(solution[0])}, p={int(solution[1])}")
print(f"Лучшее значение F1-macro: {solution_fitness:.4f}")

GA evaluations:  98%|█████████▊| 195/200 [2:04:23<02:13, 26.61s/it]  

Лучшие гиперпараметры: n_neighbors=2, p=7
Лучшее значение F1-macro: 0.9943
