# Optymalizacja hiperparametrów dla RandomForestClassifier

W tym notebooku przetestujemy dwa narzędzia do optymalizacji hiperparametrów:
- **Optuna**
- **Hyperopt**

Porównamy wyniki obu metod na przykładzie klasyfikatora `RandomForestClassifier`.

In [None]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
import optuna
from hyperopt import hp, fmin, tpe, Trials

## Przygotowanie danych

In [3]:
data = load_breast_cancer(as_frame=True)
df = data.frame
df['target'] = data.target

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Optymalizacja za pomocą Optuna

In [None]:
def objective_optuna(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 500)
    max_depth = trial.suggest_int("max_depth", 5, 50)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 20)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )

    score = cross_val_score(model, X_train, y_train, cv=3, scoring="accuracy").mean()
    return -score

study = optuna.create_study(direction="minimize")
study.optimize(objective_optuna, n_trials=50)

print("Najlepsze parametry Optuna:", study.best_params)
print("Najlepszy wynik Optuna:", -study.best_value)

## Optymalizacja za pomocą Hyperopt

In [None]:
def objective_hyperopt(params):
    model = RandomForestClassifier(
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        min_samples_split=int(params['min_samples_split']),
        min_samples_leaf=int(params['min_samples_leaf']),
        random_state=42
    )

    score = cross_val_score(model, X_train, y_train, cv=3, scoring="accuracy").mean()
    return -score

space = {
    'n_estimators': hp.quniform('n_estimators', 50, 500, 10),
    'max_depth': hp.quniform('max_depth', 5, 50, 1),
    'min_samples_split': hp.quniform('min_samples_split', 2, 20, 1),
    'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 10, 1),
}

trials = Trials()
best_hyperopt = fmin(fn=objective_hyperopt, space=space, algo=tpe.suggest, max_evals=50, trials=trials)

print("Najlepsze parametry Hyperopt:", best_hyperopt)

## Porównanie wyników

In [None]:
model_optuna = RandomForestClassifier(
    n_estimators=study.best_params['n_estimators'],
    max_depth=study.best_params['max_depth'],
    min_samples_split=study.best_params['min_samples_split'],
    min_samples_leaf=study.best_params['min_samples_leaf'],
    random_state=42
)
model_optuna.fit(X_train, y_train)
accuracy_optuna = model_optuna.score(X_test, y_test)

model_hyperopt = RandomForestClassifier(
    n_estimators=int(best_hyperopt['n_estimators']),
    max_depth=int(best_hyperopt['max_depth']),
    min_samples_split=int(best_hyperopt['min_samples_split']),
    min_samples_leaf=int(best_hyperopt['min_samples_leaf']),
    random_state=42
)
model_hyperopt.fit(X_train, y_train)
accuracy_hyperopt = model_hyperopt.score(X_test, y_test)

print(f"Dokładność modelu (Optuna): {accuracy_optuna}")
print(f"Dokładność modelu (Hyperopt): {accuracy_hyperopt}")