# Оптимизация гиперпараметров

In [None]:
import pygad
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

df = pd.read_csv('out_1000_10.cvs')


In [None]:
Model1 = RandomForestClassifier()
Model2 = LogisticRegression(max_iter=1000)


X = df.drop("collision", axis=1) 
y = df["collision"]              


def fitness_func(ga_instance, solution, solution_idx):
    params = decode_solution(solution)
    
    model = Model1(**params)
    
    scores = cross_val_score(model, X, y, cv=5)
    
    return np.mean(scores)

def decode_solution(solution):
    return {
        'n_estimators': int(solution[0]), 
        'max_depth': int(solution[1]) if solution[1] > 1 else None, 
        'min_samples_split': int(solution[2]),  
        'min_samples_leaf': int(solution[3]), 
        'max_features': ['sqrt', 'log2', None][int(solution[4] % 3)]  
    }

gene_space = [
    {'low': 10, 'high': 100},  
    {'low': 0.01, 'high': 1},  

]

ga_instance = pygad.GA(
    num_generations=50,
    num_parents_mating=4,
    fitness_func=fitness_func,
    sol_per_pop=10,
    num_genes=len(gene_space),
    gene_space=gene_space,
    mutation_type="adaptive",
    mutation_probability=[0.25, 0.1]
)

ga_instance.run()

best_solution, best_fitness, _ = ga_instance.best_solution()
best_params = decode_solution(best_solution)

best_model = Model1(**best_params)
best_model.fit(X, y)
save_model(best_model, 'ga_optimized_model1.pkl')